ELF>Hr@@      !"#$%&'()*+,;<-./0123456789:=>?@ABCDEFIJKLMNOPcdSTUVWXZ[\]^_`abefghijklmnopqrsuvwxz{|}~      !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrs !"#$%&'()*+,-./01234CDMNOPQRSTWXtuvwxyz{|}~     56789:;<=>?@ABEFGHIJKLUVYZ[\]^_`abcdefghijklmnopstwUSHHHG@HSHHHH,fHnHEfHnHfHnflflKC K0C@H[]ff.AWAVAUATUSH(H|$HHD$H@H$HH$H@HD$HBHD$LxMMgMUIl$HLmMMuMiMNMtMIyLL$ZLL$Iy IA0IYH9IA0HpH|$@HI~ IF0I^H9t IF0Hp@LHInI|$ ID$0I\$H9tID$0Hp@LHtgI @LHrIDH} HE0H]H9t HE0Hp@HHmHI IG0I_H9t IG0Hp@LHtHIf@I} IE0I]H9t IE0Hp@LHTINL|$I IG0I_H9t.IG0HpHD$@LHtH\$L@HuL<$I IG0I_H9t,IG0HpH$@LHtH$gL@HuLt$I~ IF0I^H9t-IF0HpH$@LHtH\$L@HuH([]A\A]A^A_Ðff.ATIUHSHHt HL)HwD$?H$H9H|$HE1f.H ALcLsHkI0M|$? MIt fA>-I A<--IuID$L9I H-nostdinI9I sH-nostdinI9 fA>-U H5LaID$L9SH=G?D$?H] H9$#Lt$HH$ HD$HHD$ H$0H$HD$0H$*H$L$Ht$LHD$(HL$8I.I^ffo$Mn)$H$IHDŽ$AIFH9t-I?IGH9IGI HpI9uHtLHH)H$H$H9t9f.H}HEH9HEH HpH9uH$HtH$HH)H$0HtHt$HH$@HHD$ H$PH$HD$0H$H$LHLHD$H$XIFM.ffo$I^)$H$H$MHDŽ$IFAI9t*@I?IGH9IGI HpL9uMtH4$LL)H$L$L9t8fI}IEH9OIEI HpI9uL$MtH$LL)H$PHt HHЀ|$>L$P HHL$@HrIvI;v{HFHH$@L9 HH$PHFH$HHFIF HHL$@HrIvI;vHFHH$@L9z HH$PHFH$HHFIF HDŽ$HH HHxfHnHPfHnfl)$fo$p)$HHHH\$xfH$HDŽ$ChI^I)$HH)HH9 HH $I^H$IIfInLflH$)$H9t/IDIGIVLI IIvI H&L9uLD$HL$@HT$xLHt$(L$H$L$L9t6I>IFH9IFI HpL9uL$MtH$LL)H|$xHtHP@L$Mt!= AFPAV, LAƄLH$hdH+%(HxD[]A\A]A^A_DLH5L&LH5LLH5Lb IhMLH5LLSIL$L|H5LHL$HL$HKfI L9 ,fDH H9KmfDI L9fDI L9#DfDM |$? D$?fA~cXID$L9JH=>fDLHH$@L9:H$PHp$fDLHH$@L9H$PHpfDfA~c+A~ +ID$L9H=fID$vD$>H] H9$EH{t H8/AA~/IFMT$HDŽ$(HD$`HD$(H$ L$0LT$XAtH|$L$ Ƅ$0L$(HL)H ID$LT$XH$ L9HE$0H9 -IfB!H$(H$ HL$(BD H$(HH)H9. H$ L$1L9HE$0I9 HL$(B L$(HI9 H$ ID$L9HE$0H9 B!/H$(H$ BD H$(HH)L9w H$ N$L9HE$0I9B M[L$(B H$ H{L9m HSH$(H9vHCHSHKH$0HCHiH$ H$0HDŽ$(H$ L9tH$0HpH9$Lt$HH|$NfI L93TfDƄ$Lt$(L$H$HDŽ$HCLH$AtH$HH)I9 H$HL$(H9L$1HE$I9Ht"HH[Ht$PHH$L$HB'L$I9H$M|$H9HE$I9gB"/H$L$BD LCH$H$H H9M$0HE$I9Mt HILHH$L$B'H;H$HH9HKH$H9HHKHsH$HSHH$H$HDŽ$H$H9tH$HpH H9$E1fD1E1efDAfDH] H9$Lt$HIL$L9HHHL$@HrIvI;vHFHH$@L9HH$PHFH$HHFIF HHL$@HrIvI;v^HFHH$@L9aHH$PHFH$HHFIF fL=3H5LH|$ L$@9Ht$ HT$0HHH$HH$@LHHH@LMA8NAwCHHH$@L9H$PHpffo$PnfDfo$Pf fDL1H$fA1LLH IHL$PL1H$ ID$pfDL$PƄ$P1HDŽ$HLfo$PvHD$PH$H$UL9H$HtHHH$H;HSH$oLHH$@L9pH$PHpZLHH$@L9+H$PHp@ILP=NAF PAV ILPAF{LI HH@0H9LHHsH$HCH$H$HTfo$P~H=I WLHUL1LH$@A1LLH LL$LL$gLD$(HL$P1LH$@AF HH<0HHt$PHLT$XH$ LT$XH<0IHt$`LH$ }HD$PH$@dAH$@HCHKH$0HCL$ L$0LH|$A1LH LT$hHD$XLT$hHD$XIH|$HL$P1LT$XH$ LT$XHL$`H|$M1H$ H|$A1LH LT$hHD$XLT$hHD$XH;l$H$(HtHt{LH$(H{HSH$ $H$H;:HAFH$ HD$PH$ !$0H$(H{L H=@AWAVAUATUSHHT$(dH%(H$HHt$ H)H=IH!HG H|$ HD$0HIO(MW LgL)Hl$(HH?HH,HHLLuL]L9LHFHLLHL$8L\$L$L$L\$HL$8HD$ HXHpI9HIFHt,LHL$8LT$H4$H4$LT$HL$8MI)I9}HI9wDEH9HHFHLH $H $LLHʸL)H9}HH9ЅHD$ HXHpH9HHFHLL\$8HL$H4$H4$HL$L\$8WyHt$0LH\$ Lt$0M'MoInLLt$L9HFHt!I>Lty0I L)H9}HH9~։uHkLH]HuII9HIFHt%HH $H $ty2H LH)H9}HH9~ԉЅuLI9rCHT$(Ht$ LLL)H=H|$(+Lt$ LfDLHI @H)ٸH9}HH9ȅI9HIFHt'LtHLaMI)I9}HI9DEtMLMHD$huHHH?HHL9HD$HL$H$L;|$@| H$L$H$HD$@L$Ƅ$HDŽ$H$IEHL$H?HHMtoMLILHLHkL;I9HIFH0 LLD$ HL$H4$H4$HL$LD$  d IIMK(H;L;|$H+ H$fInfHnflH9 HSL;LcHCH H$H$HDŽ$H$H;|$HtH$HpH$H;|$@tH$HpH$H;|$P H$HpHl$X H|$p <H$dH+%(H[]A\A]A^A_ÅH9HHFH-H)ٸH9HH9,ȅHt$HHPHHPHHHD$HHH|$pMlH$HT$XHD$@H$I]MHD$PH$HD$HIEH|$`L9IIUIMMmH|$@H$H$IEAEH$H;D$P1H$H$HD$PHl$`H$HDŽ$H$HD$XƄ$H9 Ll$hIMMLLT$8LL$0LD$(Ht$ L$L$Ht$ LD$(LL$0LT$8MHLHHHMHIHkMM$L9M9M$ID$ML$HSIT$MLHCHCAL;|$XIMI\$L<HMGLLLKLHLLUHuM9LIFH LʸL)H9GHH9l Ѕ*I9Mt I(LLLLKM$ML$CL*M$ML$HCID$H+I LLl$hMIL9t$pu |$ HD$HH$H$H;L$@H$H$H$L$HD$@MFHDŽ$Ƅ$IL$H$L9t$`L,$LIMLILHHLLcL+I9IFH~HLLD$0LL$(HL$ HL$ LL$(LD$0M IL,$IMLK0H;H;L$HtH$fInfHnflH9cHSH LcHCHRH$H$HDŽ$H$H;|$HtH$HpH$H;|$@tH$HpH|$`H$Hl$`H;|$PtH$HpIEI I]ML9HD$@Ao}AEIMIEH$)$fo$)$LH)H9}HH9ЅLIL,$LHMLH;H;L$HMt"IHt$HLL$H;LcB'H$CALKM$@fo$H)$GIK,0LsH}M9L9LmHELeHSHUHvH;HCIWHCHH?HHL9|$`MH$MH$I DMZH CHD$HH$H$HD$HHH;|$PtH$HpLt$IGHD$`ILt$XMDH|$XHD$PH$HGH_H9H$HD$XHHH$HAHIH$IHAH;D$`iHAIFHAIFHHD$`IIFHD$@H$H$H;D$P H$H$H$H$L)HDŽ$Ƅ$H$HD$PH\$pH$HHHD$hHII?IIL|$8H@L|$`1MM@LLD$0L\$(HL$ Ht$L$L$Ht$HL$ L\$(LD$0MHLHIHLHHL{LH}M9L9LUHELmHSHUHH;HCHCL9t$8MLH]L4HINLILkLIMM_I7M9LIFHLL)H9MHH9+Ѕ0fH9WMt%ILLL$LkH}L$LmB/H;#@LULmHCHEL;LDHL)H9}HH9EЅLIMHLH;L;|$HMt"IHt$HLL$H;LcB'H$Hl$X H|$p MfCLkH}Dfo$I)$fo$)$fI9IVHtH)Ht$`HIVHD$XHPH@Ilf.oH)$)$IO$(LkI<$M9;H9kM<$ID$Il$HSIT$H]H;HCIVHCHH?HHMhLL$MH$I@LL;CHD$HH$H$HD$HH6HL$`LE1Ol-H;LHLHELeL9/HUH9#HHKHSHEHCHH}HMHELHyMILIL9Ht%H;HLL$HkI<$L$Il$/H;ILOt6H;MIMI$IL$H9IT$H9HHsHSID$HCHI<$It$ID$HL$L$H;MHHSHEHCLeLI9IT$Ht"HHH $IT$H;H $HSI<$nM<$Il$HCID$L+LMIHM$L$H;5AFIVH9 Mt'ILLLD$ LcH}LD$ LeB'H;H9HUHtHtULHUH;HSH}HD$`LmLeHCHEL3LCLcH}EHUH;CHkI<$HHSID$HCI $H%L$L$MLLPL L,$H$MLL$LAD$IT$H;?HfDAWAVAUATUSHHdH%(HD$81H FIHF<(F<oFPH~P)$HdHH\$ D$ M4$H\$L|$HD$H<$IHpXHLMjALH|$ Y^H9tHD$ HpHHt5Pt.HhHH\0 @H H9tHLuHD$8dH+%(wHH[]A\A]A^A_DA\$H4$AD$LHT$A\$tTHD$8dH+%(HH[]A\A]A^A_HD$8dH+%(pHH[]A\A]A^A_HLH,HD$8dH+%()HHHL[]A\A]A^A_HD$8dH+%( HH[]A\A]A^A_HD$8dH+%(HH[]A\A]A^A_HD$8dH+%(HH[]A\A]A^A_Hv(yHD$8dH+%(HHHL[]A\A]A^A_<Hv(;HD$8dH+%(HHHL[]A\A]A^A_Hv@HUHHt HHHtL%HD$8dH+%(}HHHL[]A\A]A^A_ HD$8dH+%(zHH[]A\A]A^A_HD$8dH+%(HH[]A\A]A^A_HD$8dH+%("HH[]A\A]A^A_Hv0rEHPHL,EHHL9uHI9H3LuHD$8dH+%(HH[]A\A]A^A_HD$8dH+%(1HH[]A\A]A^A_Hv(XHt HHHtL,HD$8dH+%(HHHL[]A\A]A^A_HF@H0HPHt HHHtLHD$8dH+%(pHHHL[]A\A]A^A_HD$8dH+%(mHH[]A\A]A^A_HD$8dH+%(HH[]A\A]A^A_HD$8dH+%(HH[]A\A]A^A_HD$8dH+%(HH[]A\A]A^A_Hv(9HD$8dH+%(HHHL[]A\A]A^A_HF`HPHXLlL9uHI9H3LuoHv0^HD$8dH+%(HHHL[]A\A]A^A_A$HLEHPHL,EHHI9ufDHI9H3LuHD$8dH+%(HH[]A\A]A^A_HD$8dH+%(HH[]A\A]A^A_Hv8PHu0LHD$8dH+%( HH[]A\A]A^A_HD$8dH+%( HH[]A\A]A^A_HD$8dH+%( HHHL[]A\A]A^A_fHD$8dH+%( HH[]A\A]A^A_HF8HXLlL9u fDHI9 H3Lu_HD$8dH+%( HH[]A\A]A^A_HD$8dH+%( HH[]A\A]A^A_HD$8dH+%(G HH[]A\A]A^A_Hv@HUHHt HHHtL EHPHL,E HHI9unfDHI9[H3LuGHD$8dH+%(R HH[]A\A]A^A_HLEHPHL,E HHI9uDHI9H3LuHD$8dH+%( HH[]A\A]A^A_HD$8dH+%( HH[]A\A]A^A_Hv0@Hu8L,Ht HHHtLX HD$8dH+%(J HHHL[]A\A]A^A_ HD$8dH+%( HH[]A\A]A^A_HD$8dH+%( HH[]A\A]A^A_HD$8dH+%( HH[]A\A]A^A_Hv8MHt HHHtLy !HD$8dH+%( HHHL[]A\A]A^A_t Ht HHHtL HD$8dH+%(q HHHL[]A\A]A^A_ HvHHUPyHt HHHtL MECHPHL,E`HHL9ufHI9H3LuHvXHu`HtLHu0HD$8dH+%(HHL[]A\A]A^A_HD$8dH+%(HH[]A\A]A^A_HD$8dH+%(NHH[]A\A]A^A_HD$8dH+%(HH[]A\A]A^A_HD$8dH+%(HH[]A\A]A^A_Hv0BHD$8dH+%(kHHHL[]A\A]A^A_ HLEHPHL,E$HHI9ubfHI9SH3Lu?HD$8dH+%(HH[]A\A]A^A_HD$8dH+%(-HH[]A\A]A^A_Hv8Hu0LHt HHHtLHD$8dH+%( HHHL[]A\A]A^A_ fHD$8dH+%(+HHHL[]A\A]A^A_ HD$8dH+%(HH[]A\A]A^A_HD$8dH+%(HH[]A\A]A^A_HF@HSH0HPEHPHL,E>HHL9uHI9{H3LugZHD$8dH+%( HHHL[]A\A]A^A_HD$8dH+%(|HH[]A\A]A^A_HD$8dH+%(4HH[]A\A]A^A_HD$8dH+%(VHH[]A\A]A^A_HD$8dH+%(HH[]A\A]A^A_HD$8dH+%(HH[]A\A]A^A_HD$8dH+%(HH[]A\A]A^A_HD$8dH+%(HH[]A\A]A^A_HD$8dH+%(;HH[]A\A]A^A_Hv8Hu0LHt HHHtLHD$8dH+%(sHHHL[]A\A]A^A_HD$8dH+%(HH[]A\A]A^A_Hv8?Hu@L+Ht HHHtLWHD$8dH+%(HHHL[]A\A]A^A_RHD$8dH+%(HH[]A\A]A^A_HD$8dH+%(HH[]A\A]A^A_HD$8dH+%(wHH[]A\A]A^A_DH<$H@(HwHH\$ D$ M4$H\$L|$HD$H<$IHHu0L>HD$8dH+%(HHHL[]A\A]A^A_9Hu0HD$8dH+%(BHHL[]A\A]A^A_Hv8u11B111s11v@AVAUIHATUSHIHqDHHtA uPIm<'w9Is1?HBIES>wHHtI}htmHLImH[Ht<CPvЍJۃsJImv<'yD[]A\A]A^f.HIEhLs0LHH8HGHH@&wDG&wXLw Gt%II>G&vHLw GuAtAEpIAFuHHtff.FtzAUIATUHHSHPHL$AEtWLH@HI9tH3HuH[]A\A]DH[]A\A]1fDFtzAUIATUHHSHPHL$AEtWLH@HI9tH3HuH[]A\A]DH[]A\A]1fDFtzAUIATUHHSHPHL$AEtWLH@HI9tH3HuH[]A\A]DH[]A\A]1fDAVIAUATUHSAĄu[D]A\A]A^f.AFtLPHL,AFt7LHHI9tH3HuE1[]DA\A]A^1HGHGX111111Hptt1H71f.H1tt1H71f.H1tt1H71f.H1F tG t F8Gp0HATLgH9tHLDLA\HATLgH9tHLDLA\HATLgH9tHLDLA\HHHUHHHp]SHH0VdH%(HD$(HH$HD$H|$HT$D$H{HT$(dH+%(uH0[HdH%(HD$1HHT$dH+%(uHSHGHHx u[1HCH@ HH[AVAUATUHHSo1LeHHHtHPHEPLhPHHMpHfCHC(LcLkC(HC0HC8HC@HCHCMIDžpH HXLpIpHIpHtHPHELh H@HLHHLfHEH@ @[]A\A]A^DIpHHHUHHH]UHSHHHHGHh H@HHHHHH{[]IHHLHH0pAVAUATUSLg@HGHI,I9t!H}HHHPI9uHk@HCPH9tHS4LC(tADc0Et8I1DI<(HtHtHHpLC(HI9uLCDkH{EtOI1DL$/Mt2It,I$I|$LpHtHPLLH{HI9u[]A\A]A^I9fH{[]A\A]A^SHHH{HC(H9tHs([H@[ATHUHHHHGL` HH@LHHH}HH]A\HSHHHHtHHH{`H;{XtH{@HPH9t [[HSHHHHtHHHHHHH9tHHpHHH9tHHpH{`H;{XtH{@HPH9t [[F(tG(t HWH;VtÐ1HtHHvHHAUIATUSHH_HoH9u lHH9t_LeMt=tuAD$PAT$uI$LP=t`AD$ PAT$ uI$HLPH9uImHt?Iu HH[H)]A\A]fDAD$fAD$ fH[]A\A]USHHoHtEH}H} HE0H]H9t HE0Hp@HHt HH[]basic_string::_M_construct null not validError: The file was not recognized as source code: fail to parse project option : %s: __pos (which is %zu) > this->size() (which is %zu)invalid project directory for : Could not load compilationdatabase. Please use the -b option to a path containing a compile_commands.json, or use '--' followed by the compilation commands.Cannot use both sources and '-a'No source files. Please pass source files as argument, or use '-a'You must specify a project name and directory with '-p name:directory'Sources: Skipping already processed Sources: Skipping file not included by any project NotInDB: Skipping already processed NotInDB: Skipping file not included by any project Warning: This file is not a C or C++ file. It does not have highlighting.Build path containing compilation database (compile_commands.json) If this argument is not passed, the compilation arguments can be passed on the command line after '--'* [-- ]Output directory where the generated files will be putProject specification: The name of the project, the absolute path of the source code, and the revision separated by colons. Example: -p projectname:/path/to/source/code:0.3betaReference to an external project. Example: -e clang/include/clang:/opt/llvm/include/clang/:https://code.woboq.org/llvmData url where all the javascript and css files are found. Can be absolute, or relative to the output directory. Defaults to ../dataProcess all files from the compile_commands.json. If this argument is passed, the list of sources does not need to be passed EXAMPLES: Simple generation without compile command or project (compile command specified inline) codebrowser_generator -o ~/public_html/code -d https://code.woboq.org/data $PWD -- -std=c++14 -I/opt/llvm/include With a project codebrowser_generator -b $PWD/build -a -p codebrowser:$PWD -o ~/public_html/code /builtins/__clang_cuda_builtin_vars.h/*===---- cuda_builtin_vars.h - CUDA built-in variables ---------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CUDA_BUILTIN_VARS_H #define __CUDA_BUILTIN_VARS_H // Forward declares from vector_types.h. struct uint3; struct dim3; // The file implements built-in CUDA variables using __declspec(property). // https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx // All read accesses of built-in variable fields get converted into calls to a // getter function which in turn calls the appropriate builtin to fetch the // value. // // Example: // int x = threadIdx.x; // IR output: // %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #3 // PTX output: // mov.u32 %r2, %tid.x; #define __CUDA_DEVICE_BUILTIN(FIELD, INTRINSIC) \ __declspec(property(get = __fetch_builtin_##FIELD)) unsigned int FIELD; \ static inline __attribute__((always_inline)) \ __attribute__((device)) unsigned int __fetch_builtin_##FIELD(void) { \ return INTRINSIC; \ } #if __cplusplus >= 201103L #define __DELETE =delete #else #define __DELETE #endif // Make sure nobody can create instances of the special variable types. nvcc // also disallows taking address of special variables, so we disable address-of // operator as well. #define __CUDA_DISALLOW_BUILTINVAR_ACCESS(TypeName) \ __attribute__((device)) TypeName() __DELETE; \ __attribute__((device)) TypeName(const TypeName &) __DELETE; \ __attribute__((device)) void operator=(const TypeName &) const __DELETE; \ __attribute__((device)) TypeName *operator&() const __DELETE struct __cuda_builtin_threadIdx_t { __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_tid_x()); __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_tid_y()); __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_tid_z()); // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a // uint3). This function is defined after we pull in vector_types.h. __attribute__((device)) operator dim3() const; __attribute__((device)) operator uint3() const; private: __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t); }; struct __cuda_builtin_blockIdx_t { __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ctaid_x()); __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ctaid_y()); __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ctaid_z()); // blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a // uint3). This function is defined after we pull in vector_types.h. __attribute__((device)) operator dim3() const; __attribute__((device)) operator uint3() const; private: __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t); }; struct __cuda_builtin_blockDim_t { __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ntid_x()); __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ntid_y()); __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ntid_z()); // blockDim should be convertible to dim3 (in fact in nvcc, it *is* a // dim3). This function is defined after we pull in vector_types.h. __attribute__((device)) operator dim3() const; __attribute__((device)) operator uint3() const; private: __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t); }; struct __cuda_builtin_gridDim_t { __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_nctaid_x()); __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_nctaid_y()); __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_nctaid_z()); // gridDim should be convertible to dim3 (in fact in nvcc, it *is* a // dim3). This function is defined after we pull in vector_types.h. __attribute__((device)) operator dim3() const; __attribute__((device)) operator uint3() const; private: __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t); }; #define __CUDA_BUILTIN_VAR \ extern const __attribute__((device)) __attribute__((weak)) __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx; __CUDA_BUILTIN_VAR __cuda_builtin_blockIdx_t blockIdx; __CUDA_BUILTIN_VAR __cuda_builtin_blockDim_t blockDim; __CUDA_BUILTIN_VAR __cuda_builtin_gridDim_t gridDim; // warpSize should translate to read of %WARP_SZ but there's currently no // builtin to do so. According to PTX v4.2 docs 'to date, all target // architectures have a WARP_SZ value of 32'. __attribute__((device)) const int warpSize = 32; #undef __CUDA_DEVICE_BUILTIN #undef __CUDA_BUILTIN_VAR #undef __CUDA_DISALLOW_BUILTINVAR_ACCESS #undef __DELETE #endif /* __CUDA_BUILTIN_VARS_H */ /builtins/__clang_cuda_cmath.h/*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CLANG_CUDA_CMATH_H__ #define __CLANG_CUDA_CMATH_H__ #ifndef __CUDA__ #error "This file is for CUDA compilation only." #endif #ifndef __OPENMP_NVPTX__ #include #endif // CUDA lets us use various std math functions on the device side. This file // works in concert with __clang_cuda_math_forward_declares.h to make this work. // // Specifically, the forward-declares header declares __device__ overloads for // these functions in the global namespace, then pulls them into namespace std // with 'using' statements. Then this file implements those functions, after // their implementations have been pulled in. // // It's important that we declare the functions in the global namespace and pull // them into namespace std with using statements, as opposed to simply declaring // these functions in namespace std, because our device functions need to // overload the standard library functions, which may be declared in the global // namespace or in std, depending on the degree of conformance of the stdlib // implementation. Declaring in the global namespace and pulling into namespace // std covers all of the known knowns. #ifdef __OPENMP_NVPTX__ #define __DEVICE__ static constexpr __attribute__((always_inline, nothrow)) #else #define __DEVICE__ static __device__ __inline__ __attribute__((always_inline)) #endif __DEVICE__ long long abs(long long __n) { return ::llabs(__n); } __DEVICE__ long abs(long __n) { return ::labs(__n); } __DEVICE__ float abs(float __x) { return ::fabsf(__x); } __DEVICE__ double abs(double __x) { return ::fabs(__x); } __DEVICE__ float acos(float __x) { return ::acosf(__x); } __DEVICE__ float asin(float __x) { return ::asinf(__x); } __DEVICE__ float atan(float __x) { return ::atanf(__x); } __DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); } __DEVICE__ float ceil(float __x) { return ::ceilf(__x); } __DEVICE__ float cos(float __x) { return ::cosf(__x); } __DEVICE__ float cosh(float __x) { return ::coshf(__x); } __DEVICE__ float exp(float __x) { return ::expf(__x); } __DEVICE__ float fabs(float __x) { return ::fabsf(__x); } __DEVICE__ float floor(float __x) { return ::floorf(__x); } __DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); } __DEVICE__ int fpclassify(float __x) { return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, __x); } __DEVICE__ int fpclassify(double __x) { return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, __x); } __DEVICE__ float frexp(float __arg, int *__exp) { return ::frexpf(__arg, __exp); } // For inscrutable reasons, the CUDA headers define these functions for us on // Windows. #if !defined(_MSC_VER) || defined(__OPENMP_NVPTX__) // For OpenMP we work around some old system headers that have non-conforming // `isinf(float)` and `isnan(float)` implementations that return an `int`. We do // this by providing two versions of these functions, differing only in the // return type. To avoid conflicting definitions we disable implicit base // function generation. That means we will end up with two specializations, one // per type, but only one has a base function defined by the system header. #if defined(__OPENMP_NVPTX__) #pragma omp begin declare variant match( \ implementation = {extension(disable_implicit_base)}) // FIXME: We lack an extension to customize the mangling of the variants, e.g., // add a suffix. This means we would clash with the names of the variants // (note that we do not create implicit base functions here). To avoid // this clash we add a new trait to some of them that is always true // (this is LLVM after all ;)). It will only influence the mangled name // of the variants inside the inner region and avoid the clash. #pragma omp begin declare variant match(implementation = {vendor(llvm)}) __DEVICE__ int isinf(float __x) { return ::__isinff(__x); } __DEVICE__ int isinf(double __x) { return ::__isinf(__x); } __DEVICE__ int isfinite(float __x) { return ::__finitef(__x); } __DEVICE__ int isfinite(double __x) { return ::__isfinited(__x); } __DEVICE__ int isnan(float __x) { return ::__isnanf(__x); } __DEVICE__ int isnan(double __x) { return ::__isnan(__x); } #pragma omp end declare variant #endif __DEVICE__ bool isinf(float __x) { return ::__isinff(__x); } __DEVICE__ bool isinf(double __x) { return ::__isinf(__x); } __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); } // For inscrutable reasons, __finite(), the double-precision version of // __finitef, does not exist when compiling for MacOS. __isfinited is available // everywhere and is just as good. __DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); } __DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); } __DEVICE__ bool isnan(double __x) { return ::__isnan(__x); } #if defined(__OPENMP_NVPTX__) #pragma omp end declare variant #endif #endif __DEVICE__ bool isgreater(float __x, float __y) { return __builtin_isgreater(__x, __y); } __DEVICE__ bool isgreater(double __x, double __y) { return __builtin_isgreater(__x, __y); } __DEVICE__ bool isgreaterequal(float __x, float __y) { return __builtin_isgreaterequal(__x, __y); } __DEVICE__ bool isgreaterequal(double __x, double __y) { return __builtin_isgreaterequal(__x, __y); } __DEVICE__ bool isless(float __x, float __y) { return __builtin_isless(__x, __y); } __DEVICE__ bool isless(double __x, double __y) { return __builtin_isless(__x, __y); } __DEVICE__ bool islessequal(float __x, float __y) { return __builtin_islessequal(__x, __y); } __DEVICE__ bool islessequal(double __x, double __y) { return __builtin_islessequal(__x, __y); } __DEVICE__ bool islessgreater(float __x, float __y) { return __builtin_islessgreater(__x, __y); } __DEVICE__ bool islessgreater(double __x, double __y) { return __builtin_islessgreater(__x, __y); } __DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); } __DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); } __DEVICE__ bool isunordered(float __x, float __y) { return __builtin_isunordered(__x, __y); } __DEVICE__ bool isunordered(double __x, double __y) { return __builtin_isunordered(__x, __y); } __DEVICE__ float ldexp(float __arg, int __exp) { return ::ldexpf(__arg, __exp); } __DEVICE__ float log(float __x) { return ::logf(__x); } __DEVICE__ float log10(float __x) { return ::log10f(__x); } __DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); } __DEVICE__ float pow(float __base, float __exp) { return ::powf(__base, __exp); } __DEVICE__ float pow(float __base, int __iexp) { return ::powif(__base, __iexp); } __DEVICE__ double pow(double __base, int __iexp) { return ::powi(__base, __iexp); } __DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); } __DEVICE__ bool signbit(double __x) { return ::__signbitd(__x); } __DEVICE__ float sin(float __x) { return ::sinf(__x); } __DEVICE__ float sinh(float __x) { return ::sinhf(__x); } __DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); } __DEVICE__ float tan(float __x) { return ::tanf(__x); } __DEVICE__ float tanh(float __x) { return ::tanhf(__x); } // There was a redefinition error for this this overload in CUDA mode. // We restrict it to OpenMP mode for now, that is where it is actually needed // anyway. #ifdef __OPENMP_NVPTX__ __DEVICE__ float remquo(float __n, float __d, int *__q) { return ::remquof(__n, __d, __q); } #endif // Notably missing above is nexttoward. We omit it because // libdevice doesn't provide an implementation, and we don't want to be in the // business of implementing tricky libm functions in this header. #ifndef __OPENMP_NVPTX__ // Now we've defined everything we promised we'd define in // __clang_cuda_math_forward_declares.h. We need to do two additional things to // fix up our math functions. // // 1) Define __device__ overloads for e.g. sin(int). The CUDA headers define // only sin(float) and sin(double), which means that e.g. sin(0) is // ambiguous. // // 2) Pull the __device__ overloads of "foobarf" math functions into namespace // std. These are defined in the CUDA headers in the global namespace, // independent of everything else we've done here. // We can't use std::enable_if, because we want to be pre-C++11 compatible. But // we go ahead and unconditionally define functions that are only available when // compiling for C++11 to match the behavior of the CUDA headers. template struct __clang_cuda_enable_if {}; template struct __clang_cuda_enable_if { typedef __T type; }; // Defines an overload of __fn that accepts one integral argument, calls // __fn((double)x), and returns __retty. #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn) \ template \ __DEVICE__ \ typename __clang_cuda_enable_if::is_integer, \ __retty>::type \ __fn(__T __x) { \ return ::__fn((double)__x); \ } // Defines an overload of __fn that accepts one two arithmetic arguments, calls // __fn((double)x, (double)y), and returns a double. // // Note this is different from OVERLOAD_1, which generates an overload that // accepts only *integral* arguments. #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn) \ template \ __DEVICE__ typename __clang_cuda_enable_if< \ std::numeric_limits<__T1>::is_specialized && \ std::numeric_limits<__T2>::is_specialized, \ __retty>::type \ __fn(__T1 __x, __T2 __y) { \ return __fn((double)__x, (double)__y); \ } __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan) __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2); __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil) __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign); __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs) __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim); __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor) __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax); __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin); __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod); __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify) __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot); __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite) __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater); __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal); __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf); __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless); __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal); __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater); __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan); __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal) __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered); __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint); __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter); __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow); __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder); __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint); __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round); __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc); #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1 #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2 // Overloads for functions that don't match the patterns expected by // __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}. template __DEVICE__ typename __clang_cuda_enable_if< std::numeric_limits<__T1>::is_specialized && std::numeric_limits<__T2>::is_specialized && std::numeric_limits<__T3>::is_specialized, double>::type fma(__T1 __x, __T2 __y, __T3 __z) { return std::fma((double)__x, (double)__y, (double)__z); } template __DEVICE__ typename __clang_cuda_enable_if::is_integer, double>::type frexp(__T __x, int *__exp) { return std::frexp((double)__x, __exp); } template __DEVICE__ typename __clang_cuda_enable_if::is_integer, double>::type ldexp(__T __x, int __exp) { return std::ldexp((double)__x, __exp); } template __DEVICE__ typename __clang_cuda_enable_if< std::numeric_limits<__T1>::is_specialized && std::numeric_limits<__T2>::is_specialized, double>::type remquo(__T1 __x, __T2 __y, int *__quo) { return std::remquo((double)__x, (double)__y, __quo); } template __DEVICE__ typename __clang_cuda_enable_if::is_integer, double>::type scalbln(__T __x, long __exp) { return std::scalbln((double)__x, __exp); } template __DEVICE__ typename __clang_cuda_enable_if::is_integer, double>::type scalbn(__T __x, int __exp) { return std::scalbn((double)__x, __exp); } // We need to define these overloads in exactly the namespace our standard // library uses (including the right inline namespace), otherwise they won't be // picked up by other functions in the standard library (e.g. functions in // ). Thus the ugliness below. #ifdef _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_BEGIN_NAMESPACE_STD #else namespace std { #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION _GLIBCXX_BEGIN_NAMESPACE_VERSION #endif #endif // Pull the new overloads we defined above into namespace std. using ::acos; using ::acosh; using ::asin; using ::asinh; using ::atan; using ::atan2; using ::atanh; using ::cbrt; using ::ceil; using ::copysign; using ::cos; using ::cosh; using ::erf; using ::erfc; using ::exp; using ::exp2; using ::expm1; using ::fabs; using ::fdim; using ::floor; using ::fma; using ::fmax; using ::fmin; using ::fmod; using ::fpclassify; using ::frexp; using ::hypot; using ::ilogb; using ::isfinite; using ::isgreater; using ::isgreaterequal; using ::isless; using ::islessequal; using ::islessgreater; using ::isnormal; using ::isunordered; using ::ldexp; using ::lgamma; using ::llrint; using ::llround; using ::log; using ::log10; using ::log1p; using ::log2; using ::logb; using ::lrint; using ::lround; using ::nearbyint; using ::nextafter; using ::pow; using ::remainder; using ::remquo; using ::rint; using ::round; using ::scalbln; using ::scalbn; using ::signbit; using ::sin; using ::sinh; using ::sqrt; using ::tan; using ::tanh; using ::tgamma; using ::trunc; // Well this is fun: We need to pull these symbols in for libc++, but we can't // pull them in with libstdc++, because its ::isinf and ::isnan are different // than its std::isinf and std::isnan. #ifndef __GLIBCXX__ using ::isinf; using ::isnan; #endif // Finally, pull the "foobarf" functions that CUDA defines in its headers into // namespace std. using ::acosf; using ::acoshf; using ::asinf; using ::asinhf; using ::atan2f; using ::atanf; using ::atanhf; using ::cbrtf; using ::ceilf; using ::copysignf; using ::cosf; using ::coshf; using ::erfcf; using ::erff; using ::exp2f; using ::expf; using ::expm1f; using ::fabsf; using ::fdimf; using ::floorf; using ::fmaf; using ::fmaxf; using ::fminf; using ::fmodf; using ::frexpf; using ::hypotf; using ::ilogbf; using ::ldexpf; using ::lgammaf; using ::llrintf; using ::llroundf; using ::log10f; using ::log1pf; using ::log2f; using ::logbf; using ::logf; using ::lrintf; using ::lroundf; using ::modff; using ::nearbyintf; using ::nextafterf; using ::powf; using ::remainderf; using ::remquof; using ::rintf; using ::roundf; using ::scalblnf; using ::scalbnf; using ::sinf; using ::sinhf; using ::sqrtf; using ::tanf; using ::tanhf; using ::tgammaf; using ::truncf; #ifdef _LIBCPP_END_NAMESPACE_STD _LIBCPP_END_NAMESPACE_STD #else #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION _GLIBCXX_END_NAMESPACE_VERSION #endif } // namespace std #endif #endif // __OPENMP_NVPTX__ #undef __DEVICE__ #endif /builtins/__clang_cuda_complex_builtins.h/*===-- __clang_cuda_complex_builtins - CUDA impls of runtime complex fns ---=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CLANG_CUDA_COMPLEX_BUILTINS #define __CLANG_CUDA_COMPLEX_BUILTINS // This header defines __muldc3, __mulsc3, __divdc3, and __divsc3. These are // libgcc functions that clang assumes are available when compiling c99 complex // operations. (These implementations come from libc++, and have been modified // to work with CUDA and OpenMP target offloading [in C and C++ mode].) #pragma push_macro("__DEVICE__") #if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__) #pragma omp declare target #define __DEVICE__ __attribute__((noinline, nothrow, cold, weak)) #else #define __DEVICE__ __device__ inline #endif // To make the algorithms available for C and C++ in CUDA and OpenMP we select // different but equivalent function versions. TODO: For OpenMP we currently // select the native builtins as the overload support for templates is lacking. #if !defined(__OPENMP_NVPTX__) && !defined(__OPENMP_AMDGCN__) #define _ISNANd std::isnan #define _ISNANf std::isnan #define _ISINFd std::isinf #define _ISINFf std::isinf #define _ISFINITEd std::isfinite #define _ISFINITEf std::isfinite #define _COPYSIGNd std::copysign #define _COPYSIGNf std::copysign #define _SCALBNd std::scalbn #define _SCALBNf std::scalbn #define _ABSd std::abs #define _ABSf std::abs #define _LOGBd std::logb #define _LOGBf std::logb // Rather than pulling in std::max from algorithm everytime, use available ::max. #define _fmaxd max #define _fmaxf max #else #ifdef __AMDGCN__ #define _ISNANd __ocml_isnan_f64 #define _ISNANf __ocml_isnan_f32 #define _ISINFd __ocml_isinf_f64 #define _ISINFf __ocml_isinf_f32 #define _ISFINITEd __ocml_isfinite_f64 #define _ISFINITEf __ocml_isfinite_f32 #define _COPYSIGNd __ocml_copysign_f64 #define _COPYSIGNf __ocml_copysign_f32 #define _SCALBNd __ocml_scalbn_f64 #define _SCALBNf __ocml_scalbn_f32 #define _ABSd __ocml_fabs_f64 #define _ABSf __ocml_fabs_f32 #define _LOGBd __ocml_logb_f64 #define _LOGBf __ocml_logb_f32 #define _fmaxd __ocml_fmax_f64 #define _fmaxf __ocml_fmax_f32 #else #define _ISNANd __nv_isnand #define _ISNANf __nv_isnanf #define _ISINFd __nv_isinfd #define _ISINFf __nv_isinff #define _ISFINITEd __nv_isfinited #define _ISFINITEf __nv_finitef #define _COPYSIGNd __nv_copysign #define _COPYSIGNf __nv_copysignf #define _SCALBNd __nv_scalbn #define _SCALBNf __nv_scalbnf #define _ABSd __nv_fabs #define _ABSf __nv_fabsf #define _LOGBd __nv_logb #define _LOGBf __nv_logbf #define _fmaxd __nv_fmax #define _fmaxf __nv_fmaxf #endif #endif #if defined(__cplusplus) extern "C" { #endif __DEVICE__ double _Complex __muldc3(double __a, double __b, double __c, double __d) { double __ac = __a * __c; double __bd = __b * __d; double __ad = __a * __d; double __bc = __b * __c; double _Complex z; __real__(z) = __ac - __bd; __imag__(z) = __ad + __bc; if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) { int __recalc = 0; if (_ISINFd(__a) || _ISINFd(__b)) { __a = _COPYSIGNd(_ISINFd(__a) ? 1 : 0, __a); __b = _COPYSIGNd(_ISINFd(__b) ? 1 : 0, __b); if (_ISNANd(__c)) __c = _COPYSIGNd(0, __c); if (_ISNANd(__d)) __d = _COPYSIGNd(0, __d); __recalc = 1; } if (_ISINFd(__c) || _ISINFd(__d)) { __c = _COPYSIGNd(_ISINFd(__c) ? 1 : 0, __c); __d = _COPYSIGNd(_ISINFd(__d) ? 1 : 0, __d); if (_ISNANd(__a)) __a = _COPYSIGNd(0, __a); if (_ISNANd(__b)) __b = _COPYSIGNd(0, __b); __recalc = 1; } if (!__recalc && (_ISINFd(__ac) || _ISINFd(__bd) || _ISINFd(__ad) || _ISINFd(__bc))) { if (_ISNANd(__a)) __a = _COPYSIGNd(0, __a); if (_ISNANd(__b)) __b = _COPYSIGNd(0, __b); if (_ISNANd(__c)) __c = _COPYSIGNd(0, __c); if (_ISNANd(__d)) __d = _COPYSIGNd(0, __d); __recalc = 1; } if (__recalc) { // Can't use std::numeric_limits::infinity() -- that doesn't have // a device overload (and isn't constexpr before C++11, naturally). __real__(z) = __builtin_huge_val() * (__a * __c - __b * __d); __imag__(z) = __builtin_huge_val() * (__a * __d + __b * __c); } } return z; } __DEVICE__ float _Complex __mulsc3(float __a, float __b, float __c, float __d) { float __ac = __a * __c; float __bd = __b * __d; float __ad = __a * __d; float __bc = __b * __c; float _Complex z; __real__(z) = __ac - __bd; __imag__(z) = __ad + __bc; if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) { int __recalc = 0; if (_ISINFf(__a) || _ISINFf(__b)) { __a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a); __b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b); if (_ISNANf(__c)) __c = _COPYSIGNf(0, __c); if (_ISNANf(__d)) __d = _COPYSIGNf(0, __d); __recalc = 1; } if (_ISINFf(__c) || _ISINFf(__d)) { __c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c); __d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d); if (_ISNANf(__a)) __a = _COPYSIGNf(0, __a); if (_ISNANf(__b)) __b = _COPYSIGNf(0, __b); __recalc = 1; } if (!__recalc && (_ISINFf(__ac) || _ISINFf(__bd) || _ISINFf(__ad) || _ISINFf(__bc))) { if (_ISNANf(__a)) __a = _COPYSIGNf(0, __a); if (_ISNANf(__b)) __b = _COPYSIGNf(0, __b); if (_ISNANf(__c)) __c = _COPYSIGNf(0, __c); if (_ISNANf(__d)) __d = _COPYSIGNf(0, __d); __recalc = 1; } if (__recalc) { __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d); __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c); } } return z; } __DEVICE__ double _Complex __divdc3(double __a, double __b, double __c, double __d) { int __ilogbw = 0; // Can't use std::max, because that's defined in , and we don't // want to pull that in for every compile. The CUDA headers define // ::max(float, float) and ::max(double, double), which is sufficient for us. double __logbw = _LOGBd(_fmaxd(_ABSd(__c), _ABSd(__d))); if (_ISFINITEd(__logbw)) { __ilogbw = (int)__logbw; __c = _SCALBNd(__c, -__ilogbw); __d = _SCALBNd(__d, -__ilogbw); } double __denom = __c * __c + __d * __d; double _Complex z; __real__(z) = _SCALBNd((__a * __c + __b * __d) / __denom, -__ilogbw); __imag__(z) = _SCALBNd((__b * __c - __a * __d) / __denom, -__ilogbw); if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) { if ((__denom == 0.0) && (!_ISNANd(__a) || !_ISNANd(__b))) { __real__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __a; __imag__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __b; } else if ((_ISINFd(__a) || _ISINFd(__b)) && _ISFINITEd(__c) && _ISFINITEd(__d)) { __a = _COPYSIGNd(_ISINFd(__a) ? 1.0 : 0.0, __a); __b = _COPYSIGNd(_ISINFd(__b) ? 1.0 : 0.0, __b); __real__(z) = __builtin_huge_val() * (__a * __c + __b * __d); __imag__(z) = __builtin_huge_val() * (__b * __c - __a * __d); } else if (_ISINFd(__logbw) && __logbw > 0.0 && _ISFINITEd(__a) && _ISFINITEd(__b)) { __c = _COPYSIGNd(_ISINFd(__c) ? 1.0 : 0.0, __c); __d = _COPYSIGNd(_ISINFd(__d) ? 1.0 : 0.0, __d); __real__(z) = 0.0 * (__a * __c + __b * __d); __imag__(z) = 0.0 * (__b * __c - __a * __d); } } return z; } __DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) { int __ilogbw = 0; float __logbw = _LOGBf(_fmaxf(_ABSf(__c), _ABSf(__d))); if (_ISFINITEf(__logbw)) { __ilogbw = (int)__logbw; __c = _SCALBNf(__c, -__ilogbw); __d = _SCALBNf(__d, -__ilogbw); } float __denom = __c * __c + __d * __d; float _Complex z; __real__(z) = _SCALBNf((__a * __c + __b * __d) / __denom, -__ilogbw); __imag__(z) = _SCALBNf((__b * __c - __a * __d) / __denom, -__ilogbw); if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) { if ((__denom == 0) && (!_ISNANf(__a) || !_ISNANf(__b))) { __real__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __a; __imag__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __b; } else if ((_ISINFf(__a) || _ISINFf(__b)) && _ISFINITEf(__c) && _ISFINITEf(__d)) { __a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a); __b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b); __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d); __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d); } else if (_ISINFf(__logbw) && __logbw > 0 && _ISFINITEf(__a) && _ISFINITEf(__b)) { __c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c); __d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d); __real__(z) = 0 * (__a * __c + __b * __d); __imag__(z) = 0 * (__b * __c - __a * __d); } } return z; } #if defined(__cplusplus) } // extern "C" #endif #undef _ISNANd #undef _ISNANf #undef _ISINFd #undef _ISINFf #undef _COPYSIGNd #undef _COPYSIGNf #undef _ISFINITEd #undef _ISFINITEf #undef _SCALBNd #undef _SCALBNf #undef _ABSd #undef _ABSf #undef _LOGBd #undef _LOGBf #undef _fmaxd #undef _fmaxf #if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__) #pragma omp end declare target #endif #pragma pop_macro("__DEVICE__") #endif // __CLANG_CUDA_COMPLEX_BUILTINS /builtins/__clang_cuda_device_functions.h/*===---- __clang_cuda_device_functions.h - CUDA runtime support -----------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CLANG_CUDA_DEVICE_FUNCTIONS_H__ #define __CLANG_CUDA_DEVICE_FUNCTIONS_H__ #ifndef __OPENMP_NVPTX__ #if CUDA_VERSION < 9000 #error This file is intended to be used with CUDA-9+ only. #endif #endif // __DEVICE__ is a helper macro with common set of attributes for the wrappers // we implement in this file. We need static in order to avoid emitting unused // functions and __forceinline__ helps inlining these wrappers at -O1. #pragma push_macro("__DEVICE__") #ifdef __OPENMP_NVPTX__ #define __DEVICE__ static __attribute__((always_inline, nothrow)) #else #define __DEVICE__ static __device__ __forceinline__ #endif __DEVICE__ int __all(int __a) { return __nvvm_vote_all(__a); } __DEVICE__ int __any(int __a) { return __nvvm_vote_any(__a); } __DEVICE__ unsigned int __ballot(int __a) { return __nvvm_vote_ballot(__a); } __DEVICE__ unsigned int __brev(unsigned int __a) { return __nv_brev(__a); } __DEVICE__ unsigned long long __brevll(unsigned long long __a) { return __nv_brevll(__a); } #if defined(__cplusplus) __DEVICE__ void __brkpt() { __asm__ __volatile__("brkpt;"); } __DEVICE__ void __brkpt(int __a) { __brkpt(); } #else __DEVICE__ void __attribute__((overloadable)) __brkpt(void) { __asm__ __volatile__("brkpt;"); } __DEVICE__ void __attribute__((overloadable)) __brkpt(int __a) { __brkpt(); } #endif __DEVICE__ unsigned int __byte_perm(unsigned int __a, unsigned int __b, unsigned int __c) { return __nv_byte_perm(__a, __b, __c); } __DEVICE__ int __clz(int __a) { return __nv_clz(__a); } __DEVICE__ int __clzll(long long __a) { return __nv_clzll(__a); } __DEVICE__ float __cosf(float __a) { return __nv_fast_cosf(__a); } __DEVICE__ double __dAtomicAdd(double *__p, double __v) { return __nvvm_atom_add_gen_d(__p, __v); } __DEVICE__ double __dAtomicAdd_block(double *__p, double __v) { return __nvvm_atom_cta_add_gen_d(__p, __v); } __DEVICE__ double __dAtomicAdd_system(double *__p, double __v) { return __nvvm_atom_sys_add_gen_d(__p, __v); } __DEVICE__ double __dadd_rd(double __a, double __b) { return __nv_dadd_rd(__a, __b); } __DEVICE__ double __dadd_rn(double __a, double __b) { return __nv_dadd_rn(__a, __b); } __DEVICE__ double __dadd_ru(double __a, double __b) { return __nv_dadd_ru(__a, __b); } __DEVICE__ double __dadd_rz(double __a, double __b) { return __nv_dadd_rz(__a, __b); } __DEVICE__ double __ddiv_rd(double __a, double __b) { return __nv_ddiv_rd(__a, __b); } __DEVICE__ double __ddiv_rn(double __a, double __b) { return __nv_ddiv_rn(__a, __b); } __DEVICE__ double __ddiv_ru(double __a, double __b) { return __nv_ddiv_ru(__a, __b); } __DEVICE__ double __ddiv_rz(double __a, double __b) { return __nv_ddiv_rz(__a, __b); } __DEVICE__ double __dmul_rd(double __a, double __b) { return __nv_dmul_rd(__a, __b); } __DEVICE__ double __dmul_rn(double __a, double __b) { return __nv_dmul_rn(__a, __b); } __DEVICE__ double __dmul_ru(double __a, double __b) { return __nv_dmul_ru(__a, __b); } __DEVICE__ double __dmul_rz(double __a, double __b) { return __nv_dmul_rz(__a, __b); } __DEVICE__ float __double2float_rd(double __a) { return __nv_double2float_rd(__a); } __DEVICE__ float __double2float_rn(double __a) { return __nv_double2float_rn(__a); } __DEVICE__ float __double2float_ru(double __a) { return __nv_double2float_ru(__a); } __DEVICE__ float __double2float_rz(double __a) { return __nv_double2float_rz(__a); } __DEVICE__ int __double2hiint(double __a) { return __nv_double2hiint(__a); } __DEVICE__ int __double2int_rd(double __a) { return __nv_double2int_rd(__a); } __DEVICE__ int __double2int_rn(double __a) { return __nv_double2int_rn(__a); } __DEVICE__ int __double2int_ru(double __a) { return __nv_double2int_ru(__a); } __DEVICE__ int __double2int_rz(double __a) { return __nv_double2int_rz(__a); } __DEVICE__ long long __double2ll_rd(double __a) { return __nv_double2ll_rd(__a); } __DEVICE__ long long __double2ll_rn(double __a) { return __nv_double2ll_rn(__a); } __DEVICE__ long long __double2ll_ru(double __a) { return __nv_double2ll_ru(__a); } __DEVICE__ long long __double2ll_rz(double __a) { return __nv_double2ll_rz(__a); } __DEVICE__ int __double2loint(double __a) { return __nv_double2loint(__a); } __DEVICE__ unsigned int __double2uint_rd(double __a) { return __nv_double2uint_rd(__a); } __DEVICE__ unsigned int __double2uint_rn(double __a) { return __nv_double2uint_rn(__a); } __DEVICE__ unsigned int __double2uint_ru(double __a) { return __nv_double2uint_ru(__a); } __DEVICE__ unsigned int __double2uint_rz(double __a) { return __nv_double2uint_rz(__a); } __DEVICE__ unsigned long long __double2ull_rd(double __a) { return __nv_double2ull_rd(__a); } __DEVICE__ unsigned long long __double2ull_rn(double __a) { return __nv_double2ull_rn(__a); } __DEVICE__ unsigned long long __double2ull_ru(double __a) { return __nv_double2ull_ru(__a); } __DEVICE__ unsigned long long __double2ull_rz(double __a) { return __nv_double2ull_rz(__a); } __DEVICE__ long long __double_as_longlong(double __a) { return __nv_double_as_longlong(__a); } __DEVICE__ double __drcp_rd(double __a) { return __nv_drcp_rd(__a); } __DEVICE__ double __drcp_rn(double __a) { return __nv_drcp_rn(__a); } __DEVICE__ double __drcp_ru(double __a) { return __nv_drcp_ru(__a); } __DEVICE__ double __drcp_rz(double __a) { return __nv_drcp_rz(__a); } __DEVICE__ double __dsqrt_rd(double __a) { return __nv_dsqrt_rd(__a); } __DEVICE__ double __dsqrt_rn(double __a) { return __nv_dsqrt_rn(__a); } __DEVICE__ double __dsqrt_ru(double __a) { return __nv_dsqrt_ru(__a); } __DEVICE__ double __dsqrt_rz(double __a) { return __nv_dsqrt_rz(__a); } __DEVICE__ double __dsub_rd(double __a, double __b) { return __nv_dsub_rd(__a, __b); } __DEVICE__ double __dsub_rn(double __a, double __b) { return __nv_dsub_rn(__a, __b); } __DEVICE__ double __dsub_ru(double __a, double __b) { return __nv_dsub_ru(__a, __b); } __DEVICE__ double __dsub_rz(double __a, double __b) { return __nv_dsub_rz(__a, __b); } __DEVICE__ float __exp10f(float __a) { return __nv_fast_exp10f(__a); } __DEVICE__ float __expf(float __a) { return __nv_fast_expf(__a); } __DEVICE__ float __fAtomicAdd(float *__p, float __v) { return __nvvm_atom_add_gen_f(__p, __v); } __DEVICE__ float __fAtomicAdd_block(float *__p, float __v) { return __nvvm_atom_cta_add_gen_f(__p, __v); } __DEVICE__ float __fAtomicAdd_system(float *__p, float __v) { return __nvvm_atom_sys_add_gen_f(__p, __v); } __DEVICE__ float __fAtomicExch(float *__p, float __v) { return __nv_int_as_float( __nvvm_atom_xchg_gen_i((int *)__p, __nv_float_as_int(__v))); } __DEVICE__ float __fAtomicExch_block(float *__p, float __v) { return __nv_int_as_float( __nvvm_atom_cta_xchg_gen_i((int *)__p, __nv_float_as_int(__v))); } __DEVICE__ float __fAtomicExch_system(float *__p, float __v) { return __nv_int_as_float( __nvvm_atom_sys_xchg_gen_i((int *)__p, __nv_float_as_int(__v))); } __DEVICE__ float __fadd_rd(float __a, float __b) { return __nv_fadd_rd(__a, __b); } __DEVICE__ float __fadd_rn(float __a, float __b) { return __nv_fadd_rn(__a, __b); } __DEVICE__ float __fadd_ru(float __a, float __b) { return __nv_fadd_ru(__a, __b); } __DEVICE__ float __fadd_rz(float __a, float __b) { return __nv_fadd_rz(__a, __b); } __DEVICE__ float __fdiv_rd(float __a, float __b) { return __nv_fdiv_rd(__a, __b); } __DEVICE__ float __fdiv_rn(float __a, float __b) { return __nv_fdiv_rn(__a, __b); } __DEVICE__ float __fdiv_ru(float __a, float __b) { return __nv_fdiv_ru(__a, __b); } __DEVICE__ float __fdiv_rz(float __a, float __b) { return __nv_fdiv_rz(__a, __b); } __DEVICE__ float __fdividef(float __a, float __b) { return __nv_fast_fdividef(__a, __b); } __DEVICE__ int __ffs(int __a) { return __nv_ffs(__a); } __DEVICE__ int __ffsll(long long __a) { return __nv_ffsll(__a); } __DEVICE__ int __finite(double __a) { return __nv_isfinited(__a); } __DEVICE__ int __finitef(float __a) { return __nv_finitef(__a); } #ifdef _MSC_VER __DEVICE__ int __finitel(long double __a); #endif __DEVICE__ int __float2int_rd(float __a) { return __nv_float2int_rd(__a); } __DEVICE__ int __float2int_rn(float __a) { return __nv_float2int_rn(__a); } __DEVICE__ int __float2int_ru(float __a) { return __nv_float2int_ru(__a); } __DEVICE__ int __float2int_rz(float __a) { return __nv_float2int_rz(__a); } __DEVICE__ long long __float2ll_rd(float __a) { return __nv_float2ll_rd(__a); } __DEVICE__ long long __float2ll_rn(float __a) { return __nv_float2ll_rn(__a); } __DEVICE__ long long __float2ll_ru(float __a) { return __nv_float2ll_ru(__a); } __DEVICE__ long long __float2ll_rz(float __a) { return __nv_float2ll_rz(__a); } __DEVICE__ unsigned int __float2uint_rd(float __a) { return __nv_float2uint_rd(__a); } __DEVICE__ unsigned int __float2uint_rn(float __a) { return __nv_float2uint_rn(__a); } __DEVICE__ unsigned int __float2uint_ru(float __a) { return __nv_float2uint_ru(__a); } __DEVICE__ unsigned int __float2uint_rz(float __a) { return __nv_float2uint_rz(__a); } __DEVICE__ unsigned long long __float2ull_rd(float __a) { return __nv_float2ull_rd(__a); } __DEVICE__ unsigned long long __float2ull_rn(float __a) { return __nv_float2ull_rn(__a); } __DEVICE__ unsigned long long __float2ull_ru(float __a) { return __nv_float2ull_ru(__a); } __DEVICE__ unsigned long long __float2ull_rz(float __a) { return __nv_float2ull_rz(__a); } __DEVICE__ int __float_as_int(float __a) { return __nv_float_as_int(__a); } __DEVICE__ unsigned int __float_as_uint(float __a) { return __nv_float_as_uint(__a); } __DEVICE__ double __fma_rd(double __a, double __b, double __c) { return __nv_fma_rd(__a, __b, __c); } __DEVICE__ double __fma_rn(double __a, double __b, double __c) { return __nv_fma_rn(__a, __b, __c); } __DEVICE__ double __fma_ru(double __a, double __b, double __c) { return __nv_fma_ru(__a, __b, __c); } __DEVICE__ double __fma_rz(double __a, double __b, double __c) { return __nv_fma_rz(__a, __b, __c); } __DEVICE__ float __fmaf_ieee_rd(float __a, float __b, float __c) { return __nv_fmaf_ieee_rd(__a, __b, __c); } __DEVICE__ float __fmaf_ieee_rn(float __a, float __b, float __c) { return __nv_fmaf_ieee_rn(__a, __b, __c); } __DEVICE__ float __fmaf_ieee_ru(float __a, float __b, float __c) { return __nv_fmaf_ieee_ru(__a, __b, __c); } __DEVICE__ float __fmaf_ieee_rz(float __a, float __b, float __c) { return __nv_fmaf_ieee_rz(__a, __b, __c); } __DEVICE__ float __fmaf_rd(float __a, float __b, float __c) { return __nv_fmaf_rd(__a, __b, __c); } __DEVICE__ float __fmaf_rn(float __a, float __b, float __c) { return __nv_fmaf_rn(__a, __b, __c); } __DEVICE__ float __fmaf_ru(float __a, float __b, float __c) { return __nv_fmaf_ru(__a, __b, __c); } __DEVICE__ float __fmaf_rz(float __a, float __b, float __c) { return __nv_fmaf_rz(__a, __b, __c); } __DEVICE__ float __fmul_rd(float __a, float __b) { return __nv_fmul_rd(__a, __b); } __DEVICE__ float __fmul_rn(float __a, float __b) { return __nv_fmul_rn(__a, __b); } __DEVICE__ float __fmul_ru(float __a, float __b) { return __nv_fmul_ru(__a, __b); } __DEVICE__ float __fmul_rz(float __a, float __b) { return __nv_fmul_rz(__a, __b); } __DEVICE__ float __frcp_rd(float __a) { return __nv_frcp_rd(__a); } __DEVICE__ float __frcp_rn(float __a) { return __nv_frcp_rn(__a); } __DEVICE__ float __frcp_ru(float __a) { return __nv_frcp_ru(__a); } __DEVICE__ float __frcp_rz(float __a) { return __nv_frcp_rz(__a); } __DEVICE__ float __frsqrt_rn(float __a) { return __nv_frsqrt_rn(__a); } __DEVICE__ float __fsqrt_rd(float __a) { return __nv_fsqrt_rd(__a); } __DEVICE__ float __fsqrt_rn(float __a) { return __nv_fsqrt_rn(__a); } __DEVICE__ float __fsqrt_ru(float __a) { return __nv_fsqrt_ru(__a); } __DEVICE__ float __fsqrt_rz(float __a) { return __nv_fsqrt_rz(__a); } __DEVICE__ float __fsub_rd(float __a, float __b) { return __nv_fsub_rd(__a, __b); } __DEVICE__ float __fsub_rn(float __a, float __b) { return __nv_fsub_rn(__a, __b); } __DEVICE__ float __fsub_ru(float __a, float __b) { return __nv_fsub_ru(__a, __b); } __DEVICE__ float __fsub_rz(float __a, float __b) { return __nv_fsub_rz(__a, __b); } __DEVICE__ int __hadd(int __a, int __b) { return __nv_hadd(__a, __b); } __DEVICE__ double __hiloint2double(int __a, int __b) { return __nv_hiloint2double(__a, __b); } __DEVICE__ int __iAtomicAdd(int *__p, int __v) { return __nvvm_atom_add_gen_i(__p, __v); } __DEVICE__ int __iAtomicAdd_block(int *__p, int __v) { return __nvvm_atom_cta_add_gen_i(__p, __v); } __DEVICE__ int __iAtomicAdd_system(int *__p, int __v) { return __nvvm_atom_sys_add_gen_i(__p, __v); } __DEVICE__ int __iAtomicAnd(int *__p, int __v) { return __nvvm_atom_and_gen_i(__p, __v); } __DEVICE__ int __iAtomicAnd_block(int *__p, int __v) { return __nvvm_atom_cta_and_gen_i(__p, __v); } __DEVICE__ int __iAtomicAnd_system(int *__p, int __v) { return __nvvm_atom_sys_and_gen_i(__p, __v); } __DEVICE__ int __iAtomicCAS(int *__p, int __cmp, int __v) { return __nvvm_atom_cas_gen_i(__p, __cmp, __v); } __DEVICE__ int __iAtomicCAS_block(int *__p, int __cmp, int __v) { return __nvvm_atom_cta_cas_gen_i(__p, __cmp, __v); } __DEVICE__ int __iAtomicCAS_system(int *__p, int __cmp, int __v) { return __nvvm_atom_sys_cas_gen_i(__p, __cmp, __v); } __DEVICE__ int __iAtomicExch(int *__p, int __v) { return __nvvm_atom_xchg_gen_i(__p, __v); } __DEVICE__ int __iAtomicExch_block(int *__p, int __v) { return __nvvm_atom_cta_xchg_gen_i(__p, __v); } __DEVICE__ int __iAtomicExch_system(int *__p, int __v) { return __nvvm_atom_sys_xchg_gen_i(__p, __v); } __DEVICE__ int __iAtomicMax(int *__p, int __v) { return __nvvm_atom_max_gen_i(__p, __v); } __DEVICE__ int __iAtomicMax_block(int *__p, int __v) { return __nvvm_atom_cta_max_gen_i(__p, __v); } __DEVICE__ int __iAtomicMax_system(int *__p, int __v) { return __nvvm_atom_sys_max_gen_i(__p, __v); } __DEVICE__ int __iAtomicMin(int *__p, int __v) { return __nvvm_atom_min_gen_i(__p, __v); } __DEVICE__ int __iAtomicMin_block(int *__p, int __v) { return __nvvm_atom_cta_min_gen_i(__p, __v); } __DEVICE__ int __iAtomicMin_system(int *__p, int __v) { return __nvvm_atom_sys_min_gen_i(__p, __v); } __DEVICE__ int __iAtomicOr(int *__p, int __v) { return __nvvm_atom_or_gen_i(__p, __v); } __DEVICE__ int __iAtomicOr_block(int *__p, int __v) { return __nvvm_atom_cta_or_gen_i(__p, __v); } __DEVICE__ int __iAtomicOr_system(int *__p, int __v) { return __nvvm_atom_sys_or_gen_i(__p, __v); } __DEVICE__ int __iAtomicXor(int *__p, int __v) { return __nvvm_atom_xor_gen_i(__p, __v); } __DEVICE__ int __iAtomicXor_block(int *__p, int __v) { return __nvvm_atom_cta_xor_gen_i(__p, __v); } __DEVICE__ int __iAtomicXor_system(int *__p, int __v) { return __nvvm_atom_sys_xor_gen_i(__p, __v); } __DEVICE__ long long __illAtomicMax(long long *__p, long long __v) { return __nvvm_atom_max_gen_ll(__p, __v); } __DEVICE__ long long __illAtomicMax_block(long long *__p, long long __v) { return __nvvm_atom_cta_max_gen_ll(__p, __v); } __DEVICE__ long long __illAtomicMax_system(long long *__p, long long __v) { return __nvvm_atom_sys_max_gen_ll(__p, __v); } __DEVICE__ long long __illAtomicMin(long long *__p, long long __v) { return __nvvm_atom_min_gen_ll(__p, __v); } __DEVICE__ long long __illAtomicMin_block(long long *__p, long long __v) { return __nvvm_atom_cta_min_gen_ll(__p, __v); } __DEVICE__ long long __illAtomicMin_system(long long *__p, long long __v) { return __nvvm_atom_sys_min_gen_ll(__p, __v); } __DEVICE__ double __int2double_rn(int __a) { return __nv_int2double_rn(__a); } __DEVICE__ float __int2float_rd(int __a) { return __nv_int2float_rd(__a); } __DEVICE__ float __int2float_rn(int __a) { return __nv_int2float_rn(__a); } __DEVICE__ float __int2float_ru(int __a) { return __nv_int2float_ru(__a); } __DEVICE__ float __int2float_rz(int __a) { return __nv_int2float_rz(__a); } __DEVICE__ float __int_as_float(int __a) { return __nv_int_as_float(__a); } __DEVICE__ int __isfinited(double __a) { return __nv_isfinited(__a); } __DEVICE__ int __isinf(double __a) { return __nv_isinfd(__a); } __DEVICE__ int __isinff(float __a) { return __nv_isinff(__a); } #ifdef _MSC_VER __DEVICE__ int __isinfl(long double __a); #endif __DEVICE__ int __isnan(double __a) { return __nv_isnand(__a); } __DEVICE__ int __isnanf(float __a) { return __nv_isnanf(__a); } #ifdef _MSC_VER __DEVICE__ int __isnanl(long double __a); #endif __DEVICE__ double __ll2double_rd(long long __a) { return __nv_ll2double_rd(__a); } __DEVICE__ double __ll2double_rn(long long __a) { return __nv_ll2double_rn(__a); } __DEVICE__ double __ll2double_ru(long long __a) { return __nv_ll2double_ru(__a); } __DEVICE__ double __ll2double_rz(long long __a) { return __nv_ll2double_rz(__a); } __DEVICE__ float __ll2float_rd(long long __a) { return __nv_ll2float_rd(__a); } __DEVICE__ float __ll2float_rn(long long __a) { return __nv_ll2float_rn(__a); } __DEVICE__ float __ll2float_ru(long long __a) { return __nv_ll2float_ru(__a); } __DEVICE__ float __ll2float_rz(long long __a) { return __nv_ll2float_rz(__a); } __DEVICE__ long long __llAtomicAnd(long long *__p, long long __v) { return __nvvm_atom_and_gen_ll(__p, __v); } __DEVICE__ long long __llAtomicAnd_block(long long *__p, long long __v) { return __nvvm_atom_cta_and_gen_ll(__p, __v); } __DEVICE__ long long __llAtomicAnd_system(long long *__p, long long __v) { return __nvvm_atom_sys_and_gen_ll(__p, __v); } __DEVICE__ long long __llAtomicOr(long long *__p, long long __v) { return __nvvm_atom_or_gen_ll(__p, __v); } __DEVICE__ long long __llAtomicOr_block(long long *__p, long long __v) { return __nvvm_atom_cta_or_gen_ll(__p, __v); } __DEVICE__ long long __llAtomicOr_system(long long *__p, long long __v) { return __nvvm_atom_sys_or_gen_ll(__p, __v); } __DEVICE__ long long __llAtomicXor(long long *__p, long long __v) { return __nvvm_atom_xor_gen_ll(__p, __v); } __DEVICE__ long long __llAtomicXor_block(long long *__p, long long __v) { return __nvvm_atom_cta_xor_gen_ll(__p, __v); } __DEVICE__ long long __llAtomicXor_system(long long *__p, long long __v) { return __nvvm_atom_sys_xor_gen_ll(__p, __v); } __DEVICE__ float __log10f(float __a) { return __nv_fast_log10f(__a); } __DEVICE__ float __log2f(float __a) { return __nv_fast_log2f(__a); } __DEVICE__ float __logf(float __a) { return __nv_fast_logf(__a); } __DEVICE__ double __longlong_as_double(long long __a) { return __nv_longlong_as_double(__a); } __DEVICE__ int __mul24(int __a, int __b) { return __nv_mul24(__a, __b); } __DEVICE__ long long __mul64hi(long long __a, long long __b) { return __nv_mul64hi(__a, __b); } __DEVICE__ int __mulhi(int __a, int __b) { return __nv_mulhi(__a, __b); } __DEVICE__ unsigned int __pm0(void) { return __nvvm_read_ptx_sreg_pm0(); } __DEVICE__ unsigned int __pm1(void) { return __nvvm_read_ptx_sreg_pm1(); } __DEVICE__ unsigned int __pm2(void) { return __nvvm_read_ptx_sreg_pm2(); } __DEVICE__ unsigned int __pm3(void) { return __nvvm_read_ptx_sreg_pm3(); } __DEVICE__ int __popc(int __a) { return __nv_popc(__a); } __DEVICE__ int __popcll(long long __a) { return __nv_popcll(__a); } __DEVICE__ float __powf(float __a, float __b) { return __nv_fast_powf(__a, __b); } // Parameter must have a known integer value. #define __prof_trigger(__a) __asm__ __volatile__("pmevent \t%0;" ::"i"(__a)) __DEVICE__ int __rhadd(int __a, int __b) { return __nv_rhadd(__a, __b); } __DEVICE__ unsigned int __sad(int __a, int __b, unsigned int __c) { return __nv_sad(__a, __b, __c); } __DEVICE__ float __saturatef(float __a) { return __nv_saturatef(__a); } __DEVICE__ int __signbitd(double __a) { return __nv_signbitd(__a); } __DEVICE__ int __signbitf(float __a) { return __nv_signbitf(__a); } __DEVICE__ void __sincosf(float __a, float *__s, float *__c) { return __nv_fast_sincosf(__a, __s, __c); } __DEVICE__ float __sinf(float __a) { return __nv_fast_sinf(__a); } __DEVICE__ int __syncthreads_and(int __a) { return __nvvm_bar0_and(__a); } __DEVICE__ int __syncthreads_count(int __a) { return __nvvm_bar0_popc(__a); } __DEVICE__ int __syncthreads_or(int __a) { return __nvvm_bar0_or(__a); } __DEVICE__ float __tanf(float __a) { return __nv_fast_tanf(__a); } __DEVICE__ void __threadfence(void) { __nvvm_membar_gl(); } __DEVICE__ void __threadfence_block(void) { __nvvm_membar_cta(); }; __DEVICE__ void __threadfence_system(void) { __nvvm_membar_sys(); }; __DEVICE__ void __trap(void) { __asm__ __volatile__("trap;"); } __DEVICE__ unsigned int __uAtomicAdd(unsigned int *__p, unsigned int __v) { return __nvvm_atom_add_gen_i((int *)__p, __v); } __DEVICE__ unsigned int __uAtomicAdd_block(unsigned int *__p, unsigned int __v) { return __nvvm_atom_cta_add_gen_i((int *)__p, __v); } __DEVICE__ unsigned int __uAtomicAdd_system(unsigned int *__p, unsigned int __v) { return __nvvm_atom_sys_add_gen_i((int *)__p, __v); } __DEVICE__ unsigned int __uAtomicAnd(unsigned int *__p, unsigned int __v) { return __nvvm_atom_and_gen_i((int *)__p, __v); } __DEVICE__ unsigned int __uAtomicAnd_block(unsigned int *__p, unsigned int __v) { return __nvvm_atom_cta_and_gen_i((int *)__p, __v); } __DEVICE__ unsigned int __uAtomicAnd_system(unsigned int *__p, unsigned int __v) { return __nvvm_atom_sys_and_gen_i((int *)__p, __v); } __DEVICE__ unsigned int __uAtomicCAS(unsigned int *__p, unsigned int __cmp, unsigned int __v) { return __nvvm_atom_cas_gen_i((int *)__p, __cmp, __v); } __DEVICE__ unsigned int __uAtomicCAS_block(unsigned int *__p, unsigned int __cmp, unsigned int __v) { return __nvvm_atom_cta_cas_gen_i((int *)__p, __cmp, __v); } __DEVICE__ unsigned int __uAtomicCAS_system(unsigned int *__p, unsigned int __cmp, unsigned int __v) { return __nvvm_atom_sys_cas_gen_i((int *)__p, __cmp, __v); } __DEVICE__ unsigned int __uAtomicDec(unsigned int *__p, unsigned int __v) { return __nvvm_atom_dec_gen_ui(__p, __v); } __DEVICE__ unsigned int __uAtomicDec_block(unsigned int *__p, unsigned int __v) { return __nvvm_atom_cta_dec_gen_ui(__p, __v); } __DEVICE__ unsigned int __uAtomicDec_system(unsigned int *__p, unsigned int __v) { return __nvvm_atom_sys_dec_gen_ui(__p, __v); } __DEVICE__ unsigned int __uAtomicExch(unsigned int *__p, unsigned int __v) { return __nvvm_atom_xchg_gen_i((int *)__p, __v); } __DEVICE__ unsigned int __uAtomicExch_block(unsigned int *__p, unsigned int __v) { return __nvvm_atom_cta_xchg_gen_i((int *)__p, __v); } __DEVICE__ unsigned int __uAtomicExch_system(unsigned int *__p, unsigned int __v) { return __nvvm_atom_sys_xchg_gen_i((int *)__p, __v); } __DEVICE__ unsigned int __uAtomicInc(unsigned int *__p, unsigned int __v) { return __nvvm_atom_inc_gen_ui(__p, __v); } __DEVICE__ unsigned int __uAtomicInc_block(unsigned int *__p, unsigned int __v) { return __nvvm_atom_cta_inc_gen_ui(__p, __v); } __DEVICE__ unsigned int __uAtomicInc_system(unsigned int *__p, unsigned int __v) { return __nvvm_atom_sys_inc_gen_ui(__p, __v); } __DEVICE__ unsigned int __uAtomicMax(unsigned int *__p, unsigned int __v) { return __nvvm_atom_max_gen_ui(__p, __v); } __DEVICE__ unsigned int __uAtomicMax_block(unsigned int *__p, unsigned int __v) { return __nvvm_atom_cta_max_gen_ui(__p, __v); } __DEVICE__ unsigned int __uAtomicMax_system(unsigned int *__p, unsigned int __v) { return __nvvm_atom_sys_max_gen_ui(__p, __v); } __DEVICE__ unsigned int __uAtomicMin(unsigned int *__p, unsigned int __v) { return __nvvm_atom_min_gen_ui(__p, __v); } __DEVICE__ unsigned int __uAtomicMin_block(unsigned int *__p, unsigned int __v) { return __nvvm_atom_cta_min_gen_ui(__p, __v); } __DEVICE__ unsigned int __uAtomicMin_system(unsigned int *__p, unsigned int __v) { return __nvvm_atom_sys_min_gen_ui(__p, __v); } __DEVICE__ unsigned int __uAtomicOr(unsigned int *__p, unsigned int __v) { return __nvvm_atom_or_gen_i((int *)__p, __v); } __DEVICE__ unsigned int __uAtomicOr_block(unsigned int *__p, unsigned int __v) { return __nvvm_atom_cta_or_gen_i((int *)__p, __v); } __DEVICE__ unsigned int __uAtomicOr_system(unsigned int *__p, unsigned int __v) { return __nvvm_atom_sys_or_gen_i((int *)__p, __v); } __DEVICE__ unsigned int __uAtomicXor(unsigned int *__p, unsigned int __v) { return __nvvm_atom_xor_gen_i((int *)__p, __v); } __DEVICE__ unsigned int __uAtomicXor_block(unsigned int *__p, unsigned int __v) { return __nvvm_atom_cta_xor_gen_i((int *)__p, __v); } __DEVICE__ unsigned int __uAtomicXor_system(unsigned int *__p, unsigned int __v) { return __nvvm_atom_sys_xor_gen_i((int *)__p, __v); } __DEVICE__ unsigned int __uhadd(unsigned int __a, unsigned int __b) { return __nv_uhadd(__a, __b); } __DEVICE__ double __uint2double_rn(unsigned int __a) { return __nv_uint2double_rn(__a); } __DEVICE__ float __uint2float_rd(unsigned int __a) { return __nv_uint2float_rd(__a); } __DEVICE__ float __uint2float_rn(unsigned int __a) { return __nv_uint2float_rn(__a); } __DEVICE__ float __uint2float_ru(unsigned int __a) { return __nv_uint2float_ru(__a); } __DEVICE__ float __uint2float_rz(unsigned int __a) { return __nv_uint2float_rz(__a); } __DEVICE__ float __uint_as_float(unsigned int __a) { return __nv_uint_as_float(__a); } // __DEVICE__ double __ull2double_rd(unsigned long long __a) { return __nv_ull2double_rd(__a); } __DEVICE__ double __ull2double_rn(unsigned long long __a) { return __nv_ull2double_rn(__a); } __DEVICE__ double __ull2double_ru(unsigned long long __a) { return __nv_ull2double_ru(__a); } __DEVICE__ double __ull2double_rz(unsigned long long __a) { return __nv_ull2double_rz(__a); } __DEVICE__ float __ull2float_rd(unsigned long long __a) { return __nv_ull2float_rd(__a); } __DEVICE__ float __ull2float_rn(unsigned long long __a) { return __nv_ull2float_rn(__a); } __DEVICE__ float __ull2float_ru(unsigned long long __a) { return __nv_ull2float_ru(__a); } __DEVICE__ float __ull2float_rz(unsigned long long __a) { return __nv_ull2float_rz(__a); } __DEVICE__ unsigned long long __ullAtomicAdd(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_add_gen_ll((long long *)__p, __v); } __DEVICE__ unsigned long long __ullAtomicAdd_block(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_cta_add_gen_ll((long long *)__p, __v); } __DEVICE__ unsigned long long __ullAtomicAdd_system(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_sys_add_gen_ll((long long *)__p, __v); } __DEVICE__ unsigned long long __ullAtomicAnd(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_and_gen_ll((long long *)__p, __v); } __DEVICE__ unsigned long long __ullAtomicAnd_block(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_cta_and_gen_ll((long long *)__p, __v); } __DEVICE__ unsigned long long __ullAtomicAnd_system(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_sys_and_gen_ll((long long *)__p, __v); } __DEVICE__ unsigned long long __ullAtomicCAS(unsigned long long *__p, unsigned long long __cmp, unsigned long long __v) { return __nvvm_atom_cas_gen_ll((long long *)__p, __cmp, __v); } __DEVICE__ unsigned long long __ullAtomicCAS_block(unsigned long long *__p, unsigned long long __cmp, unsigned long long __v) { return __nvvm_atom_cta_cas_gen_ll((long long *)__p, __cmp, __v); } __DEVICE__ unsigned long long __ullAtomicCAS_system(unsigned long long *__p, unsigned long long __cmp, unsigned long long __v) { return __nvvm_atom_sys_cas_gen_ll((long long *)__p, __cmp, __v); } __DEVICE__ unsigned long long __ullAtomicExch(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_xchg_gen_ll((long long *)__p, __v); } __DEVICE__ unsigned long long __ullAtomicExch_block(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_cta_xchg_gen_ll((long long *)__p, __v); } __DEVICE__ unsigned long long __ullAtomicExch_system(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_sys_xchg_gen_ll((long long *)__p, __v); } __DEVICE__ unsigned long long __ullAtomicMax(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_max_gen_ull(__p, __v); } __DEVICE__ unsigned long long __ullAtomicMax_block(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_cta_max_gen_ull(__p, __v); } __DEVICE__ unsigned long long __ullAtomicMax_system(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_sys_max_gen_ull(__p, __v); } __DEVICE__ unsigned long long __ullAtomicMin(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_min_gen_ull(__p, __v); } __DEVICE__ unsigned long long __ullAtomicMin_block(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_cta_min_gen_ull(__p, __v); } __DEVICE__ unsigned long long __ullAtomicMin_system(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_sys_min_gen_ull(__p, __v); } __DEVICE__ unsigned long long __ullAtomicOr(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_or_gen_ll((long long *)__p, __v); } __DEVICE__ unsigned long long __ullAtomicOr_block(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_cta_or_gen_ll((long long *)__p, __v); } __DEVICE__ unsigned long long __ullAtomicOr_system(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_sys_or_gen_ll((long long *)__p, __v); } __DEVICE__ unsigned long long __ullAtomicXor(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_xor_gen_ll((long long *)__p, __v); } __DEVICE__ unsigned long long __ullAtomicXor_block(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_cta_xor_gen_ll((long long *)__p, __v); } __DEVICE__ unsigned long long __ullAtomicXor_system(unsigned long long *__p, unsigned long long __v) { return __nvvm_atom_sys_xor_gen_ll((long long *)__p, __v); } __DEVICE__ unsigned int __umul24(unsigned int __a, unsigned int __b) { return __nv_umul24(__a, __b); } __DEVICE__ unsigned long long __umul64hi(unsigned long long __a, unsigned long long __b) { return __nv_umul64hi(__a, __b); } __DEVICE__ unsigned int __umulhi(unsigned int __a, unsigned int __b) { return __nv_umulhi(__a, __b); } __DEVICE__ unsigned int __urhadd(unsigned int __a, unsigned int __b) { return __nv_urhadd(__a, __b); } __DEVICE__ unsigned int __usad(unsigned int __a, unsigned int __b, unsigned int __c) { return __nv_usad(__a, __b, __c); } #if CUDA_VERSION >= 9000 && CUDA_VERSION < 9020 __DEVICE__ unsigned int __vabs2(unsigned int __a) { return __nv_vabs2(__a); } __DEVICE__ unsigned int __vabs4(unsigned int __a) { return __nv_vabs4(__a); } __DEVICE__ unsigned int __vabsdiffs2(unsigned int __a, unsigned int __b) { return __nv_vabsdiffs2(__a, __b); } __DEVICE__ unsigned int __vabsdiffs4(unsigned int __a, unsigned int __b) { return __nv_vabsdiffs4(__a, __b); } __DEVICE__ unsigned int __vabsdiffu2(unsigned int __a, unsigned int __b) { return __nv_vabsdiffu2(__a, __b); } __DEVICE__ unsigned int __vabsdiffu4(unsigned int __a, unsigned int __b) { return __nv_vabsdiffu4(__a, __b); } __DEVICE__ unsigned int __vabsss2(unsigned int __a) { return __nv_vabsss2(__a); } __DEVICE__ unsigned int __vabsss4(unsigned int __a) { return __nv_vabsss4(__a); } __DEVICE__ unsigned int __vadd2(unsigned int __a, unsigned int __b) { return __nv_vadd2(__a, __b); } __DEVICE__ unsigned int __vadd4(unsigned int __a, unsigned int __b) { return __nv_vadd4(__a, __b); } __DEVICE__ unsigned int __vaddss2(unsigned int __a, unsigned int __b) { return __nv_vaddss2(__a, __b); } __DEVICE__ unsigned int __vaddss4(unsigned int __a, unsigned int __b) { return __nv_vaddss4(__a, __b); } __DEVICE__ unsigned int __vaddus2(unsigned int __a, unsigned int __b) { return __nv_vaddus2(__a, __b); } __DEVICE__ unsigned int __vaddus4(unsigned int __a, unsigned int __b) { return __nv_vaddus4(__a, __b); } __DEVICE__ unsigned int __vavgs2(unsigned int __a, unsigned int __b) { return __nv_vavgs2(__a, __b); } __DEVICE__ unsigned int __vavgs4(unsigned int __a, unsigned int __b) { return __nv_vavgs4(__a, __b); } __DEVICE__ unsigned int __vavgu2(unsigned int __a, unsigned int __b) { return __nv_vavgu2(__a, __b); } __DEVICE__ unsigned int __vavgu4(unsigned int __a, unsigned int __b) { return __nv_vavgu4(__a, __b); } __DEVICE__ unsigned int __vcmpeq2(unsigned int __a, unsigned int __b) { return __nv_vcmpeq2(__a, __b); } __DEVICE__ unsigned int __vcmpeq4(unsigned int __a, unsigned int __b) { return __nv_vcmpeq4(__a, __b); } __DEVICE__ unsigned int __vcmpges2(unsigned int __a, unsigned int __b) { return __nv_vcmpges2(__a, __b); } __DEVICE__ unsigned int __vcmpges4(unsigned int __a, unsigned int __b) { return __nv_vcmpges4(__a, __b); } __DEVICE__ unsigned int __vcmpgeu2(unsigned int __a, unsigned int __b) { return __nv_vcmpgeu2(__a, __b); } __DEVICE__ unsigned int __vcmpgeu4(unsigned int __a, unsigned int __b) { return __nv_vcmpgeu4(__a, __b); } __DEVICE__ unsigned int __vcmpgts2(unsigned int __a, unsigned int __b) { return __nv_vcmpgts2(__a, __b); } __DEVICE__ unsigned int __vcmpgts4(unsigned int __a, unsigned int __b) { return __nv_vcmpgts4(__a, __b); } __DEVICE__ unsigned int __vcmpgtu2(unsigned int __a, unsigned int __b) { return __nv_vcmpgtu2(__a, __b); } __DEVICE__ unsigned int __vcmpgtu4(unsigned int __a, unsigned int __b) { return __nv_vcmpgtu4(__a, __b); } __DEVICE__ unsigned int __vcmples2(unsigned int __a, unsigned int __b) { return __nv_vcmples2(__a, __b); } __DEVICE__ unsigned int __vcmples4(unsigned int __a, unsigned int __b) { return __nv_vcmples4(__a, __b); } __DEVICE__ unsigned int __vcmpleu2(unsigned int __a, unsigned int __b) { return __nv_vcmpleu2(__a, __b); } __DEVICE__ unsigned int __vcmpleu4(unsigned int __a, unsigned int __b) { return __nv_vcmpleu4(__a, __b); } __DEVICE__ unsigned int __vcmplts2(unsigned int __a, unsigned int __b) { return __nv_vcmplts2(__a, __b); } __DEVICE__ unsigned int __vcmplts4(unsigned int __a, unsigned int __b) { return __nv_vcmplts4(__a, __b); } __DEVICE__ unsigned int __vcmpltu2(unsigned int __a, unsigned int __b) { return __nv_vcmpltu2(__a, __b); } __DEVICE__ unsigned int __vcmpltu4(unsigned int __a, unsigned int __b) { return __nv_vcmpltu4(__a, __b); } __DEVICE__ unsigned int __vcmpne2(unsigned int __a, unsigned int __b) { return __nv_vcmpne2(__a, __b); } __DEVICE__ unsigned int __vcmpne4(unsigned int __a, unsigned int __b) { return __nv_vcmpne4(__a, __b); } __DEVICE__ unsigned int __vhaddu2(unsigned int __a, unsigned int __b) { return __nv_vhaddu2(__a, __b); } __DEVICE__ unsigned int __vhaddu4(unsigned int __a, unsigned int __b) { return __nv_vhaddu4(__a, __b); } __DEVICE__ unsigned int __vmaxs2(unsigned int __a, unsigned int __b) { return __nv_vmaxs2(__a, __b); } __DEVICE__ unsigned int __vmaxs4(unsigned int __a, unsigned int __b) { return __nv_vmaxs4(__a, __b); } __DEVICE__ unsigned int __vmaxu2(unsigned int __a, unsigned int __b) { return __nv_vmaxu2(__a, __b); } __DEVICE__ unsigned int __vmaxu4(unsigned int __a, unsigned int __b) { return __nv_vmaxu4(__a, __b); } __DEVICE__ unsigned int __vmins2(unsigned int __a, unsigned int __b) { return __nv_vmins2(__a, __b); } __DEVICE__ unsigned int __vmins4(unsigned int __a, unsigned int __b) { return __nv_vmins4(__a, __b); } __DEVICE__ unsigned int __vminu2(unsigned int __a, unsigned int __b) { return __nv_vminu2(__a, __b); } __DEVICE__ unsigned int __vminu4(unsigned int __a, unsigned int __b) { return __nv_vminu4(__a, __b); } __DEVICE__ unsigned int __vneg2(unsigned int __a) { return __nv_vneg2(__a); } __DEVICE__ unsigned int __vneg4(unsigned int __a) { return __nv_vneg4(__a); } __DEVICE__ unsigned int __vnegss2(unsigned int __a) { return __nv_vnegss2(__a); } __DEVICE__ unsigned int __vnegss4(unsigned int __a) { return __nv_vnegss4(__a); } __DEVICE__ unsigned int __vsads2(unsigned int __a, unsigned int __b) { return __nv_vsads2(__a, __b); } __DEVICE__ unsigned int __vsads4(unsigned int __a, unsigned int __b) { return __nv_vsads4(__a, __b); } __DEVICE__ unsigned int __vsadu2(unsigned int __a, unsigned int __b) { return __nv_vsadu2(__a, __b); } __DEVICE__ unsigned int __vsadu4(unsigned int __a, unsigned int __b) { return __nv_vsadu4(__a, __b); } __DEVICE__ unsigned int __vseteq2(unsigned int __a, unsigned int __b) { return __nv_vseteq2(__a, __b); } __DEVICE__ unsigned int __vseteq4(unsigned int __a, unsigned int __b) { return __nv_vseteq4(__a, __b); } __DEVICE__ unsigned int __vsetges2(unsigned int __a, unsigned int __b) { return __nv_vsetges2(__a, __b); } __DEVICE__ unsigned int __vsetges4(unsigned int __a, unsigned int __b) { return __nv_vsetges4(__a, __b); } __DEVICE__ unsigned int __vsetgeu2(unsigned int __a, unsigned int __b) { return __nv_vsetgeu2(__a, __b); } __DEVICE__ unsigned int __vsetgeu4(unsigned int __a, unsigned int __b) { return __nv_vsetgeu4(__a, __b); } __DEVICE__ unsigned int __vsetgts2(unsigned int __a, unsigned int __b) { return __nv_vsetgts2(__a, __b); } __DEVICE__ unsigned int __vsetgts4(unsigned int __a, unsigned int __b) { return __nv_vsetgts4(__a, __b); } __DEVICE__ unsigned int __vsetgtu2(unsigned int __a, unsigned int __b) { return __nv_vsetgtu2(__a, __b); } __DEVICE__ unsigned int __vsetgtu4(unsigned int __a, unsigned int __b) { return __nv_vsetgtu4(__a, __b); } __DEVICE__ unsigned int __vsetles2(unsigned int __a, unsigned int __b) { return __nv_vsetles2(__a, __b); } __DEVICE__ unsigned int __vsetles4(unsigned int __a, unsigned int __b) { return __nv_vsetles4(__a, __b); } __DEVICE__ unsigned int __vsetleu2(unsigned int __a, unsigned int __b) { return __nv_vsetleu2(__a, __b); } __DEVICE__ unsigned int __vsetleu4(unsigned int __a, unsigned int __b) { return __nv_vsetleu4(__a, __b); } __DEVICE__ unsigned int __vsetlts2(unsigned int __a, unsigned int __b) { return __nv_vsetlts2(__a, __b); } __DEVICE__ unsigned int __vsetlts4(unsigned int __a, unsigned int __b) { return __nv_vsetlts4(__a, __b); } __DEVICE__ unsigned int __vsetltu2(unsigned int __a, unsigned int __b) { return __nv_vsetltu2(__a, __b); } __DEVICE__ unsigned int __vsetltu4(unsigned int __a, unsigned int __b) { return __nv_vsetltu4(__a, __b); } __DEVICE__ unsigned int __vsetne2(unsigned int __a, unsigned int __b) { return __nv_vsetne2(__a, __b); } __DEVICE__ unsigned int __vsetne4(unsigned int __a, unsigned int __b) { return __nv_vsetne4(__a, __b); } __DEVICE__ unsigned int __vsub2(unsigned int __a, unsigned int __b) { return __nv_vsub2(__a, __b); } __DEVICE__ unsigned int __vsub4(unsigned int __a, unsigned int __b) { return __nv_vsub4(__a, __b); } __DEVICE__ unsigned int __vsubss2(unsigned int __a, unsigned int __b) { return __nv_vsubss2(__a, __b); } __DEVICE__ unsigned int __vsubss4(unsigned int __a, unsigned int __b) { return __nv_vsubss4(__a, __b); } __DEVICE__ unsigned int __vsubus2(unsigned int __a, unsigned int __b) { return __nv_vsubus2(__a, __b); } __DEVICE__ unsigned int __vsubus4(unsigned int __a, unsigned int __b) { return __nv_vsubus4(__a, __b); } #else // CUDA_VERSION >= 9020 // CUDA no longer provides inline assembly (or bitcode) implementation of these // functions, so we have to reimplment them. The implementation is naive and is // not optimized for performance. // Helper function to convert N-bit boolean subfields into all-0 or all-1. // E.g. __bool2mask(0x01000100,8) -> 0xff00ff00 // __bool2mask(0x00010000,16) -> 0xffff0000 __DEVICE__ unsigned int __bool2mask(unsigned int __a, int shift) { return (__a << shift) - __a; } __DEVICE__ unsigned int __vabs2(unsigned int __a) { unsigned int r; __asm__("vabsdiff2.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(0), "r"(0)); return r; } __DEVICE__ unsigned int __vabs4(unsigned int __a) { unsigned int r; __asm__("vabsdiff4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(0), "r"(0)); return r; } __DEVICE__ unsigned int __vabsdiffs2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vabsdiff2.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vabsdiffs4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vabsdiff4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vabsdiffu2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vabsdiff2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vabsdiffu4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vabsdiff4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vabsss2(unsigned int __a) { unsigned int r; __asm__("vabsdiff2.s32.s32.s32.sat %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(0), "r"(0)); return r; } __DEVICE__ unsigned int __vabsss4(unsigned int __a) { unsigned int r; __asm__("vabsdiff4.s32.s32.s32.sat %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(0), "r"(0)); return r; } __DEVICE__ unsigned int __vadd2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vadd2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vadd4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vadd4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vaddss2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vadd2.s32.s32.s32.sat %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vaddss4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vadd4.s32.s32.s32.sat %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vaddus2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vadd2.u32.u32.u32.sat %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vaddus4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vadd4.u32.u32.u32.sat %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vavgs2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vavrg2.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vavgs4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vavrg4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vavgu2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vavrg2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vavgu4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vavrg4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vseteq2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset2.u32.u32.eq %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpeq2(unsigned int __a, unsigned int __b) { return __bool2mask(__vseteq2(__a, __b), 16); } __DEVICE__ unsigned int __vseteq4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset4.u32.u32.eq %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpeq4(unsigned int __a, unsigned int __b) { return __bool2mask(__vseteq4(__a, __b), 8); } __DEVICE__ unsigned int __vsetges2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset2.s32.s32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpges2(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetges2(__a, __b), 16); } __DEVICE__ unsigned int __vsetges4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset4.s32.s32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpges4(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetges4(__a, __b), 8); } __DEVICE__ unsigned int __vsetgeu2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset2.u32.u32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpgeu2(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetgeu2(__a, __b), 16); } __DEVICE__ unsigned int __vsetgeu4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset4.u32.u32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpgeu4(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetgeu4(__a, __b), 8); } __DEVICE__ unsigned int __vsetgts2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset2.s32.s32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpgts2(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetgts2(__a, __b), 16); } __DEVICE__ unsigned int __vsetgts4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset4.s32.s32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpgts4(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetgts4(__a, __b), 8); } __DEVICE__ unsigned int __vsetgtu2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset2.u32.u32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpgtu2(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetgtu2(__a, __b), 16); } __DEVICE__ unsigned int __vsetgtu4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset4.u32.u32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpgtu4(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetgtu4(__a, __b), 8); } __DEVICE__ unsigned int __vsetles2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset2.s32.s32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmples2(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetles2(__a, __b), 16); } __DEVICE__ unsigned int __vsetles4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset4.s32.s32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmples4(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetles4(__a, __b), 8); } __DEVICE__ unsigned int __vsetleu2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset2.u32.u32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpleu2(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetleu2(__a, __b), 16); } __DEVICE__ unsigned int __vsetleu4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset4.u32.u32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpleu4(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetleu4(__a, __b), 8); } __DEVICE__ unsigned int __vsetlts2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset2.s32.s32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmplts2(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetlts2(__a, __b), 16); } __DEVICE__ unsigned int __vsetlts4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset4.s32.s32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmplts4(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetlts4(__a, __b), 8); } __DEVICE__ unsigned int __vsetltu2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset2.u32.u32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpltu2(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetltu2(__a, __b), 16); } __DEVICE__ unsigned int __vsetltu4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset4.u32.u32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpltu4(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetltu4(__a, __b), 8); } __DEVICE__ unsigned int __vsetne2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset2.u32.u32.ne %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpne2(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetne2(__a, __b), 16); } __DEVICE__ unsigned int __vsetne4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vset4.u32.u32.ne %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpne4(unsigned int __a, unsigned int __b) { return __bool2mask(__vsetne4(__a, __b), 8); } // Based on ITEM 23 in AIM-239: http://dspace.mit.edu/handle/1721.1/6086 // (a & b) + (a | b) = a + b = (a ^ b) + 2 * (a & b) => // (a + b) / 2 = ((a ^ b) >> 1) + (a & b) // To operate on multiple sub-elements we need to make sure to mask out bits // that crossed over into adjacent elements during the shift. __DEVICE__ unsigned int __vhaddu2(unsigned int __a, unsigned int __b) { return (((__a ^ __b) >> 1) & ~0x80008000u) + (__a & __b); } __DEVICE__ unsigned int __vhaddu4(unsigned int __a, unsigned int __b) { return (((__a ^ __b) >> 1) & ~0x80808080u) + (__a & __b); } __DEVICE__ unsigned int __vmaxs2(unsigned int __a, unsigned int __b) { unsigned int r; if ((__a & 0x8000) && (__b & 0x8000)) { // Work around a bug in ptxas which produces invalid result if low element // is negative. unsigned mask = __vcmpgts2(__a, __b); r = (__a & mask) | (__b & ~mask); } else { __asm__("vmax2.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); } return r; } __DEVICE__ unsigned int __vmaxs4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vmax4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vmaxu2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vmax2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vmaxu4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vmax4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vmins2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vmin2.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vmins4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vmin4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vminu2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vmin2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vminu4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vmin4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vsads2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vabsdiff2.s32.s32.s32.add %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vsads4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vabsdiff4.s32.s32.s32.add %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vsadu2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vabsdiff2.u32.u32.u32.add %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vsadu4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vabsdiff4.u32.u32.u32.add %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vsub2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vsub2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vneg2(unsigned int __a) { return __vsub2(0, __a); } __DEVICE__ unsigned int __vsub4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vsub4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vneg4(unsigned int __a) { return __vsub4(0, __a); } __DEVICE__ unsigned int __vsubss2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vsub2.s32.s32.s32.sat %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vnegss2(unsigned int __a) { return __vsubss2(0, __a); } __DEVICE__ unsigned int __vsubss4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vsub4.s32.s32.s32.sat %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vnegss4(unsigned int __a) { return __vsubss4(0, __a); } __DEVICE__ unsigned int __vsubus2(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vsub2.u32.u32.u32.sat %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vsubus4(unsigned int __a, unsigned int __b) { unsigned int r; __asm__("vsub4.u32.u32.u32.sat %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); return r; } #endif // CUDA_VERSION >= 9020 // For OpenMP we require the user to include as we need to know what // clock_t is on the system. #ifndef __OPENMP_NVPTX__ __DEVICE__ /* clock_t= */ int clock() { return __nvvm_read_ptx_sreg_clock(); } #endif __DEVICE__ long long clock64() { return __nvvm_read_ptx_sreg_clock64(); } // These functions shouldn't be declared when including this header // for math function resolution purposes. #ifndef __OPENMP_NVPTX__ __DEVICE__ void *memcpy(void *__a, const void *__b, size_t __c) { return __builtin_memcpy(__a, __b, __c); } __DEVICE__ void *memset(void *__a, int __b, size_t __c) { return __builtin_memset(__a, __b, __c); } #endif #pragma pop_macro("__DEVICE__") #endif // __CLANG_CUDA_DEVICE_FUNCTIONS_H__ /builtins/__clang_cuda_intrinsics.h/*===--- __clang_cuda_intrinsics.h - Device-side CUDA intrinsic wrappers ---=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CLANG_CUDA_INTRINSICS_H__ #define __CLANG_CUDA_INTRINSICS_H__ #ifndef __CUDA__ #error "This file is for CUDA compilation only." #endif // sm_30 intrinsics: __shfl_{up,down,xor}. #define __SM_30_INTRINSICS_H__ #define __SM_30_INTRINSICS_HPP__ #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300 #pragma push_macro("__MAKE_SHUFFLES") #define __MAKE_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, __Mask, \ __Type) \ inline __device__ int __FnName(int __val, __Type __offset, \ int __width = warpSize) { \ return __IntIntrinsic(__val, __offset, \ ((warpSize - __width) << 8) | (__Mask)); \ } \ inline __device__ float __FnName(float __val, __Type __offset, \ int __width = warpSize) { \ return __FloatIntrinsic(__val, __offset, \ ((warpSize - __width) << 8) | (__Mask)); \ } \ inline __device__ unsigned int __FnName(unsigned int __val, __Type __offset, \ int __width = warpSize) { \ return static_cast( \ ::__FnName(static_cast(__val), __offset, __width)); \ } \ inline __device__ long long __FnName(long long __val, __Type __offset, \ int __width = warpSize) { \ struct __Bits { \ int __a, __b; \ }; \ _Static_assert(sizeof(__val) == sizeof(__Bits)); \ _Static_assert(sizeof(__Bits) == 2 * sizeof(int)); \ __Bits __tmp; \ memcpy(&__tmp, &__val, sizeof(__val)); \ __tmp.__a = ::__FnName(__tmp.__a, __offset, __width); \ __tmp.__b = ::__FnName(__tmp.__b, __offset, __width); \ long long __ret; \ memcpy(&__ret, &__tmp, sizeof(__tmp)); \ return __ret; \ } \ inline __device__ long __FnName(long __val, __Type __offset, \ int __width = warpSize) { \ _Static_assert(sizeof(long) == sizeof(long long) || \ sizeof(long) == sizeof(int)); \ if (sizeof(long) == sizeof(long long)) { \ return static_cast( \ ::__FnName(static_cast(__val), __offset, __width)); \ } else if (sizeof(long) == sizeof(int)) { \ return static_cast( \ ::__FnName(static_cast(__val), __offset, __width)); \ } \ } \ inline __device__ unsigned long __FnName( \ unsigned long __val, __Type __offset, int __width = warpSize) { \ return static_cast( \ ::__FnName(static_cast(__val), __offset, __width)); \ } \ inline __device__ unsigned long long __FnName( \ unsigned long long __val, __Type __offset, int __width = warpSize) { \ return static_cast( \ ::__FnName(static_cast(__val), __offset, __width)); \ } \ inline __device__ double __FnName(double __val, __Type __offset, \ int __width = warpSize) { \ long long __tmp; \ _Static_assert(sizeof(__tmp) == sizeof(__val)); \ memcpy(&__tmp, &__val, sizeof(__val)); \ __tmp = ::__FnName(__tmp, __offset, __width); \ double __ret; \ memcpy(&__ret, &__tmp, sizeof(__ret)); \ return __ret; \ } __MAKE_SHUFFLES(__shfl, __nvvm_shfl_idx_i32, __nvvm_shfl_idx_f32, 0x1f, int); // We use 0 rather than 31 as our mask, because shfl.up applies to lanes >= // maxLane. __MAKE_SHUFFLES(__shfl_up, __nvvm_shfl_up_i32, __nvvm_shfl_up_f32, 0, unsigned int); __MAKE_SHUFFLES(__shfl_down, __nvvm_shfl_down_i32, __nvvm_shfl_down_f32, 0x1f, unsigned int); __MAKE_SHUFFLES(__shfl_xor, __nvvm_shfl_bfly_i32, __nvvm_shfl_bfly_f32, 0x1f, int); #pragma pop_macro("__MAKE_SHUFFLES") #endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300 #if CUDA_VERSION >= 9000 #if (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300) // __shfl_sync_* variants available in CUDA-9 #pragma push_macro("__MAKE_SYNC_SHUFFLES") #define __MAKE_SYNC_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, \ __Mask, __Type) \ inline __device__ int __FnName(unsigned int __mask, int __val, \ __Type __offset, int __width = warpSize) { \ return __IntIntrinsic(__mask, __val, __offset, \ ((warpSize - __width) << 8) | (__Mask)); \ } \ inline __device__ float __FnName(unsigned int __mask, float __val, \ __Type __offset, int __width = warpSize) { \ return __FloatIntrinsic(__mask, __val, __offset, \ ((warpSize - __width) << 8) | (__Mask)); \ } \ inline __device__ unsigned int __FnName(unsigned int __mask, \ unsigned int __val, __Type __offset, \ int __width = warpSize) { \ return static_cast( \ ::__FnName(__mask, static_cast(__val), __offset, __width)); \ } \ inline __device__ long long __FnName(unsigned int __mask, long long __val, \ __Type __offset, \ int __width = warpSize) { \ struct __Bits { \ int __a, __b; \ }; \ _Static_assert(sizeof(__val) == sizeof(__Bits)); \ _Static_assert(sizeof(__Bits) == 2 * sizeof(int)); \ __Bits __tmp; \ memcpy(&__tmp, &__val, sizeof(__val)); \ __tmp.__a = ::__FnName(__mask, __tmp.__a, __offset, __width); \ __tmp.__b = ::__FnName(__mask, __tmp.__b, __offset, __width); \ long long __ret; \ memcpy(&__ret, &__tmp, sizeof(__tmp)); \ return __ret; \ } \ inline __device__ unsigned long long __FnName( \ unsigned int __mask, unsigned long long __val, __Type __offset, \ int __width = warpSize) { \ return static_cast( \ ::__FnName(__mask, static_cast(__val), __offset, __width)); \ } \ inline __device__ long __FnName(unsigned int __mask, long __val, \ __Type __offset, int __width = warpSize) { \ _Static_assert(sizeof(long) == sizeof(long long) || \ sizeof(long) == sizeof(int)); \ if (sizeof(long) == sizeof(long long)) { \ return static_cast(::__FnName( \ __mask, static_cast(__val), __offset, __width)); \ } else if (sizeof(long) == sizeof(int)) { \ return static_cast( \ ::__FnName(__mask, static_cast(__val), __offset, __width)); \ } \ } \ inline __device__ unsigned long __FnName( \ unsigned int __mask, unsigned long __val, __Type __offset, \ int __width = warpSize) { \ return static_cast( \ ::__FnName(__mask, static_cast(__val), __offset, __width)); \ } \ inline __device__ double __FnName(unsigned int __mask, double __val, \ __Type __offset, int __width = warpSize) { \ long long __tmp; \ _Static_assert(sizeof(__tmp) == sizeof(__val)); \ memcpy(&__tmp, &__val, sizeof(__val)); \ __tmp = ::__FnName(__mask, __tmp, __offset, __width); \ double __ret; \ memcpy(&__ret, &__tmp, sizeof(__ret)); \ return __ret; \ } __MAKE_SYNC_SHUFFLES(__shfl_sync, __nvvm_shfl_sync_idx_i32, __nvvm_shfl_sync_idx_f32, 0x1f, int); // We use 0 rather than 31 as our mask, because shfl.up applies to lanes >= // maxLane. __MAKE_SYNC_SHUFFLES(__shfl_up_sync, __nvvm_shfl_sync_up_i32, __nvvm_shfl_sync_up_f32, 0, unsigned int); __MAKE_SYNC_SHUFFLES(__shfl_down_sync, __nvvm_shfl_sync_down_i32, __nvvm_shfl_sync_down_f32, 0x1f, unsigned int); __MAKE_SYNC_SHUFFLES(__shfl_xor_sync, __nvvm_shfl_sync_bfly_i32, __nvvm_shfl_sync_bfly_f32, 0x1f, int); #pragma pop_macro("__MAKE_SYNC_SHUFFLES") inline __device__ void __syncwarp(unsigned int mask = 0xffffffff) { return __nvvm_bar_warp_sync(mask); } inline __device__ void __barrier_sync(unsigned int id) { __nvvm_barrier_sync(id); } inline __device__ void __barrier_sync_count(unsigned int id, unsigned int count) { __nvvm_barrier_sync_cnt(id, count); } inline __device__ int __all_sync(unsigned int mask, int pred) { return __nvvm_vote_all_sync(mask, pred); } inline __device__ int __any_sync(unsigned int mask, int pred) { return __nvvm_vote_any_sync(mask, pred); } inline __device__ int __uni_sync(unsigned int mask, int pred) { return __nvvm_vote_uni_sync(mask, pred); } inline __device__ unsigned int __ballot_sync(unsigned int mask, int pred) { return __nvvm_vote_ballot_sync(mask, pred); } inline __device__ unsigned int __activemask() { #if CUDA_VERSION < 9020 return __nvvm_vote_ballot(1); #else unsigned int mask; asm volatile("activemask.b32 %0;" : "=r"(mask)); return mask; #endif } inline __device__ unsigned int __fns(unsigned mask, unsigned base, int offset) { return __nvvm_fns(mask, base, offset); } #endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300 // Define __match* builtins CUDA-9 headers expect to see. #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700 inline __device__ unsigned int __match32_any_sync(unsigned int mask, unsigned int value) { return __nvvm_match_any_sync_i32(mask, value); } inline __device__ unsigned int __match64_any_sync(unsigned int mask, unsigned long long value) { return __nvvm_match_any_sync_i64(mask, value); } inline __device__ unsigned int __match32_all_sync(unsigned int mask, unsigned int value, int *pred) { return __nvvm_match_all_sync_i32p(mask, value, pred); } inline __device__ unsigned int __match64_all_sync(unsigned int mask, unsigned long long value, int *pred) { return __nvvm_match_all_sync_i64p(mask, value, pred); } #include "crt/sm_70_rt.hpp" #endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700 #endif // __CUDA_VERSION >= 9000 // sm_32 intrinsics: __ldg and __funnelshift_{l,lc,r,rc}. // Prevent the vanilla sm_32 intrinsics header from being included. #define __SM_32_INTRINSICS_H__ #define __SM_32_INTRINSICS_HPP__ #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320 inline __device__ char __ldg(const char *ptr) { return __nvvm_ldg_c(ptr); } inline __device__ short __ldg(const short *ptr) { return __nvvm_ldg_s(ptr); } inline __device__ int __ldg(const int *ptr) { return __nvvm_ldg_i(ptr); } inline __device__ long __ldg(const long *ptr) { return __nvvm_ldg_l(ptr); } inline __device__ long long __ldg(const long long *ptr) { return __nvvm_ldg_ll(ptr); } inline __device__ unsigned char __ldg(const unsigned char *ptr) { return __nvvm_ldg_uc(ptr); } inline __device__ signed char __ldg(const signed char *ptr) { return __nvvm_ldg_uc((const unsigned char *)ptr); } inline __device__ unsigned short __ldg(const unsigned short *ptr) { return __nvvm_ldg_us(ptr); } inline __device__ unsigned int __ldg(const unsigned int *ptr) { return __nvvm_ldg_ui(ptr); } inline __device__ unsigned long __ldg(const unsigned long *ptr) { return __nvvm_ldg_ul(ptr); } inline __device__ unsigned long long __ldg(const unsigned long long *ptr) { return __nvvm_ldg_ull(ptr); } inline __device__ float __ldg(const float *ptr) { return __nvvm_ldg_f(ptr); } inline __device__ double __ldg(const double *ptr) { return __nvvm_ldg_d(ptr); } inline __device__ char2 __ldg(const char2 *ptr) { typedef char c2 __attribute__((ext_vector_type(2))); // We can assume that ptr is aligned at least to char2's alignment, but the // load will assume that ptr is aligned to char2's alignment. This is only // safe if alignof(c2) <= alignof(char2). c2 rv = __nvvm_ldg_c2(reinterpret_cast(ptr)); char2 ret; ret.x = rv[0]; ret.y = rv[1]; return ret; } inline __device__ char4 __ldg(const char4 *ptr) { typedef char c4 __attribute__((ext_vector_type(4))); c4 rv = __nvvm_ldg_c4(reinterpret_cast(ptr)); char4 ret; ret.x = rv[0]; ret.y = rv[1]; ret.z = rv[2]; ret.w = rv[3]; return ret; } inline __device__ short2 __ldg(const short2 *ptr) { typedef short s2 __attribute__((ext_vector_type(2))); s2 rv = __nvvm_ldg_s2(reinterpret_cast(ptr)); short2 ret; ret.x = rv[0]; ret.y = rv[1]; return ret; } inline __device__ short4 __ldg(const short4 *ptr) { typedef short s4 __attribute__((ext_vector_type(4))); s4 rv = __nvvm_ldg_s4(reinterpret_cast(ptr)); short4 ret; ret.x = rv[0]; ret.y = rv[1]; ret.z = rv[2]; ret.w = rv[3]; return ret; } inline __device__ int2 __ldg(const int2 *ptr) { typedef int i2 __attribute__((ext_vector_type(2))); i2 rv = __nvvm_ldg_i2(reinterpret_cast(ptr)); int2 ret; ret.x = rv[0]; ret.y = rv[1]; return ret; } inline __device__ int4 __ldg(const int4 *ptr) { typedef int i4 __attribute__((ext_vector_type(4))); i4 rv = __nvvm_ldg_i4(reinterpret_cast(ptr)); int4 ret; ret.x = rv[0]; ret.y = rv[1]; ret.z = rv[2]; ret.w = rv[3]; return ret; } inline __device__ longlong2 __ldg(const longlong2 *ptr) { typedef long long ll2 __attribute__((ext_vector_type(2))); ll2 rv = __nvvm_ldg_ll2(reinterpret_cast(ptr)); longlong2 ret; ret.x = rv[0]; ret.y = rv[1]; return ret; } inline __device__ uchar2 __ldg(const uchar2 *ptr) { typedef unsigned char uc2 __attribute__((ext_vector_type(2))); uc2 rv = __nvvm_ldg_uc2(reinterpret_cast(ptr)); uchar2 ret; ret.x = rv[0]; ret.y = rv[1]; return ret; } inline __device__ uchar4 __ldg(const uchar4 *ptr) { typedef unsigned char uc4 __attribute__((ext_vector_type(4))); uc4 rv = __nvvm_ldg_uc4(reinterpret_cast(ptr)); uchar4 ret; ret.x = rv[0]; ret.y = rv[1]; ret.z = rv[2]; ret.w = rv[3]; return ret; } inline __device__ ushort2 __ldg(const ushort2 *ptr) { typedef unsigned short us2 __attribute__((ext_vector_type(2))); us2 rv = __nvvm_ldg_us2(reinterpret_cast(ptr)); ushort2 ret; ret.x = rv[0]; ret.y = rv[1]; return ret; } inline __device__ ushort4 __ldg(const ushort4 *ptr) { typedef unsigned short us4 __attribute__((ext_vector_type(4))); us4 rv = __nvvm_ldg_us4(reinterpret_cast(ptr)); ushort4 ret; ret.x = rv[0]; ret.y = rv[1]; ret.z = rv[2]; ret.w = rv[3]; return ret; } inline __device__ uint2 __ldg(const uint2 *ptr) { typedef unsigned int ui2 __attribute__((ext_vector_type(2))); ui2 rv = __nvvm_ldg_ui2(reinterpret_cast(ptr)); uint2 ret; ret.x = rv[0]; ret.y = rv[1]; return ret; } inline __device__ uint4 __ldg(const uint4 *ptr) { typedef unsigned int ui4 __attribute__((ext_vector_type(4))); ui4 rv = __nvvm_ldg_ui4(reinterpret_cast(ptr)); uint4 ret; ret.x = rv[0]; ret.y = rv[1]; ret.z = rv[2]; ret.w = rv[3]; return ret; } inline __device__ ulonglong2 __ldg(const ulonglong2 *ptr) { typedef unsigned long long ull2 __attribute__((ext_vector_type(2))); ull2 rv = __nvvm_ldg_ull2(reinterpret_cast(ptr)); ulonglong2 ret; ret.x = rv[0]; ret.y = rv[1]; return ret; } inline __device__ float2 __ldg(const float2 *ptr) { typedef float f2 __attribute__((ext_vector_type(2))); f2 rv = __nvvm_ldg_f2(reinterpret_cast(ptr)); float2 ret; ret.x = rv[0]; ret.y = rv[1]; return ret; } inline __device__ float4 __ldg(const float4 *ptr) { typedef float f4 __attribute__((ext_vector_type(4))); f4 rv = __nvvm_ldg_f4(reinterpret_cast(ptr)); float4 ret; ret.x = rv[0]; ret.y = rv[1]; ret.z = rv[2]; ret.w = rv[3]; return ret; } inline __device__ double2 __ldg(const double2 *ptr) { typedef double d2 __attribute__((ext_vector_type(2))); d2 rv = __nvvm_ldg_d2(reinterpret_cast(ptr)); double2 ret; ret.x = rv[0]; ret.y = rv[1]; return ret; } // TODO: Implement these as intrinsics, so the backend can work its magic on // these. Alternatively, we could implement these as plain C and try to get // llvm to recognize the relevant patterns. inline __device__ unsigned __funnelshift_l(unsigned low32, unsigned high32, unsigned shiftWidth) { unsigned result; asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result) : "r"(low32), "r"(high32), "r"(shiftWidth)); return result; } inline __device__ unsigned __funnelshift_lc(unsigned low32, unsigned high32, unsigned shiftWidth) { unsigned result; asm("shf.l.clamp.b32 %0, %1, %2, %3;" : "=r"(result) : "r"(low32), "r"(high32), "r"(shiftWidth)); return result; } inline __device__ unsigned __funnelshift_r(unsigned low32, unsigned high32, unsigned shiftWidth) { unsigned result; asm("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(result) : "r"(low32), "r"(high32), "r"(shiftWidth)); return result; } inline __device__ unsigned __funnelshift_rc(unsigned low32, unsigned high32, unsigned shiftWidth) { unsigned ret; asm("shf.r.clamp.b32 %0, %1, %2, %3;" : "=r"(ret) : "r"(low32), "r"(high32), "r"(shiftWidth)); return ret; } #endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320 #if CUDA_VERSION >= 11000 extern "C" { __device__ inline size_t __nv_cvta_generic_to_global_impl(const void *__ptr) { return (size_t)(void __attribute__((address_space(1))) *)__ptr; } __device__ inline size_t __nv_cvta_generic_to_shared_impl(const void *__ptr) { return (size_t)(void __attribute__((address_space(3))) *)__ptr; } __device__ inline size_t __nv_cvta_generic_to_constant_impl(const void *__ptr) { return (size_t)(void __attribute__((address_space(4))) *)__ptr; } __device__ inline size_t __nv_cvta_generic_to_local_impl(const void *__ptr) { return (size_t)(void __attribute__((address_space(5))) *)__ptr; } __device__ inline void *__nv_cvta_global_to_generic_impl(size_t __ptr) { return (void *)(void __attribute__((address_space(1))) *)__ptr; } __device__ inline void *__nv_cvta_shared_to_generic_impl(size_t __ptr) { return (void *)(void __attribute__((address_space(3))) *)__ptr; } __device__ inline void *__nv_cvta_constant_to_generic_impl(size_t __ptr) { return (void *)(void __attribute__((address_space(4))) *)__ptr; } __device__ inline void *__nv_cvta_local_to_generic_impl(size_t __ptr) { return (void *)(void __attribute__((address_space(5))) *)__ptr; } __device__ inline cuuint32_t __nvvm_get_smem_pointer(void *__ptr) { return __nv_cvta_generic_to_shared_impl(__ptr); } } // extern "C" #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 800 __device__ inline unsigned __reduce_add_sync(unsigned __mask, unsigned __value) { return __nvvm_redux_sync_add(__mask, __value); } __device__ inline unsigned __reduce_min_sync(unsigned __mask, unsigned __value) { return __nvvm_redux_sync_umin(__mask, __value); } __device__ inline unsigned __reduce_max_sync(unsigned __mask, unsigned __value) { return __nvvm_redux_sync_umax(__mask, __value); } __device__ inline int __reduce_min_sync(unsigned __mask, int __value) { return __nvvm_redux_sync_min(__mask, __value); } __device__ inline int __reduce_max_sync(unsigned __mask, int __value) { return __nvvm_redux_sync_max(__mask, __value); } __device__ inline unsigned __reduce_or_sync(unsigned __mask, unsigned __value) { return __nvvm_redux_sync_or(__mask, __value); } __device__ inline unsigned __reduce_and_sync(unsigned __mask, unsigned __value) { return __nvvm_redux_sync_and(__mask, __value); } __device__ inline unsigned __reduce_xor_sync(unsigned __mask, unsigned __value) { return __nvvm_redux_sync_xor(__mask, __value); } __device__ inline void __nv_memcpy_async_shared_global_4(void *__dst, const void *__src, unsigned __src_size) { __nvvm_cp_async_ca_shared_global_4( (void __attribute__((address_space(3))) *)__dst, (const void __attribute__((address_space(1))) *)__src, __src_size); } __device__ inline void __nv_memcpy_async_shared_global_8(void *__dst, const void *__src, unsigned __src_size) { __nvvm_cp_async_ca_shared_global_8( (void __attribute__((address_space(3))) *)__dst, (const void __attribute__((address_space(1))) *)__src, __src_size); } __device__ inline void __nv_memcpy_async_shared_global_16(void *__dst, const void *__src, unsigned __src_size) { __nvvm_cp_async_ca_shared_global_16( (void __attribute__((address_space(3))) *)__dst, (const void __attribute__((address_space(1))) *)__src, __src_size); } __device__ inline void * __nv_associate_access_property(const void *__ptr, unsigned long long __prop) { // TODO: it appears to provide compiler with some sort of a hint. We do not // know what exactly it is supposed to do. However, CUDA headers suggest that // just passing through __ptr should not affect correctness. They do so on // pre-sm80 GPUs where this builtin is not available. return (void*)__ptr; } #endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 800 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 900 __device__ inline unsigned __isCtaShared(const void *ptr) { return __isShared(ptr); } __device__ inline unsigned __isClusterShared(const void *__ptr) { return __nvvm_isspacep_shared_cluster(__ptr); } __device__ inline void *__cluster_map_shared_rank(const void *__ptr, unsigned __rank) { return __nvvm_mapa((void *)__ptr, __rank); } __device__ inline unsigned __cluster_query_shared_rank(const void *__ptr) { return __nvvm_getctarank((void *)__ptr); } __device__ inline uint2 __cluster_map_shared_multicast(const void *__ptr, unsigned int __cluster_cta_mask) { return make_uint2((unsigned)__cvta_generic_to_shared(__ptr), __cluster_cta_mask); } __device__ inline unsigned __clusterDimIsSpecified() { return __nvvm_is_explicit_cluster(); } __device__ inline dim3 __clusterDim() { return dim3(__nvvm_read_ptx_sreg_cluster_nctaid_x(), __nvvm_read_ptx_sreg_cluster_nctaid_y(), __nvvm_read_ptx_sreg_cluster_nctaid_z()); } __device__ inline dim3 __clusterRelativeBlockIdx() { return dim3(__nvvm_read_ptx_sreg_cluster_ctaid_x(), __nvvm_read_ptx_sreg_cluster_ctaid_y(), __nvvm_read_ptx_sreg_cluster_ctaid_z()); } __device__ inline dim3 __clusterGridDimInClusters() { return dim3(__nvvm_read_ptx_sreg_nclusterid_x(), __nvvm_read_ptx_sreg_nclusterid_y(), __nvvm_read_ptx_sreg_nclusterid_z()); } __device__ inline dim3 __clusterIdx() { return dim3(__nvvm_read_ptx_sreg_clusterid_x(), __nvvm_read_ptx_sreg_clusterid_y(), __nvvm_read_ptx_sreg_clusterid_z()); } __device__ inline unsigned __clusterRelativeBlockRank() { return __nvvm_read_ptx_sreg_cluster_ctarank(); } __device__ inline unsigned __clusterSizeInBlocks() { return __nvvm_read_ptx_sreg_cluster_nctarank(); } __device__ inline void __cluster_barrier_arrive() { __nvvm_barrier_cluster_arrive(); } __device__ inline void __cluster_barrier_arrive_relaxed() { __nvvm_barrier_cluster_arrive_relaxed(); } __device__ inline void __cluster_barrier_wait() { __nvvm_barrier_cluster_wait(); } __device__ inline void __threadfence_cluster() { __nvvm_fence_sc_cluster(); } __device__ inline float2 atomicAdd(float2 *__ptr, float2 __val) { float2 __ret; __asm__("atom.add.v2.f32 {%0, %1}, [%2], {%3, %4};" : "=f"(__ret.x), "=f"(__ret.y) : "l"(__ptr), "f"(__val.x), "f"(__val.y)); return __ret; } __device__ inline float2 atomicAdd_block(float2 *__ptr, float2 __val) { float2 __ret; __asm__("atom.cta.add.v2.f32 {%0, %1}, [%2], {%3, %4};" : "=f"(__ret.x), "=f"(__ret.y) : "l"(__ptr), "f"(__val.x), "f"(__val.y)); return __ret; } __device__ inline float2 atomicAdd_system(float2 *__ptr, float2 __val) { float2 __ret; __asm__("atom.sys.add.v2.f32 {%0, %1}, [%2], {%3, %4};" : "=f"(__ret.x), "=f"(__ret.y) : "l"(__ptr), "f"(__val.x), "f"(__val.y)); return __ret; } __device__ inline float4 atomicAdd(float4 *__ptr, float4 __val) { float4 __ret; __asm__("atom.add.v4.f32 {%0, %1, %2, %3}, [%4], {%5, %6, %7, %8};" : "=f"(__ret.x), "=f"(__ret.y), "=f"(__ret.z), "=f"(__ret.w) : "l"(__ptr), "f"(__val.x), "f"(__val.y), "f"(__val.z), "f"(__val.w)); return __ret; } __device__ inline float4 atomicAdd_block(float4 *__ptr, float4 __val) { float4 __ret; __asm__( "atom.cta.add.v4.f32 {%0, %1, %2, %3}, [%4], {%5, %6, %7, %8};" : "=f"(__ret.x), "=f"(__ret.y), "=f"(__ret.z), "=f"(__ret.w) : "l"(__ptr), "f"(__val.x), "f"(__val.y), "f"(__val.z), "f"(__val.w)); return __ret; } __device__ inline float4 atomicAdd_system(float4 *__ptr, float4 __val) { float4 __ret; __asm__( "atom.sys.add.v4.f32 {%0, %1, %2, %3}, [%4], {%5, %6, %7, %8};" : "=f"(__ret.x), "=f"(__ret.y), "=f"(__ret.z), "=f"(__ret.w) : "l"(__ptr), "f"(__val.x), "f"(__val.y), "f"(__val.z), "f"(__val.w) :); return __ret; } #endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 900 #endif // CUDA_VERSION >= 11000 #endif // defined(__CLANG_CUDA_INTRINSICS_H__) /builtins/__clang_cuda_libdevice_declares.h/*===-- __clang_cuda_libdevice_declares.h - decls for libdevice functions --=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CLANG_CUDA_LIBDEVICE_DECLARES_H__ #define __CLANG_CUDA_LIBDEVICE_DECLARES_H__ #if defined(__cplusplus) extern "C" { #endif #if defined(__OPENMP_NVPTX__) #define __DEVICE__ #pragma omp begin assumes ext_spmd_amenable no_openmp #elif defined(__CUDA__) #define __DEVICE__ __device__ #endif __DEVICE__ int __nv_abs(int __a); __DEVICE__ double __nv_acos(double __a); __DEVICE__ float __nv_acosf(float __a); __DEVICE__ double __nv_acosh(double __a); __DEVICE__ float __nv_acoshf(float __a); __DEVICE__ double __nv_asin(double __a); __DEVICE__ float __nv_asinf(float __a); __DEVICE__ double __nv_asinh(double __a); __DEVICE__ float __nv_asinhf(float __a); __DEVICE__ double __nv_atan2(double __a, double __b); __DEVICE__ float __nv_atan2f(float __a, float __b); __DEVICE__ double __nv_atan(double __a); __DEVICE__ float __nv_atanf(float __a); __DEVICE__ double __nv_atanh(double __a); __DEVICE__ float __nv_atanhf(float __a); __DEVICE__ int __nv_brev(int __a); __DEVICE__ long long __nv_brevll(long long __a); __DEVICE__ int __nv_byte_perm(int __a, int __b, int __c); __DEVICE__ double __nv_cbrt(double __a); __DEVICE__ float __nv_cbrtf(float __a); __DEVICE__ double __nv_ceil(double __a); __DEVICE__ float __nv_ceilf(float __a); __DEVICE__ int __nv_clz(int __a); __DEVICE__ int __nv_clzll(long long __a); __DEVICE__ double __nv_copysign(double __a, double __b); __DEVICE__ float __nv_copysignf(float __a, float __b); __DEVICE__ double __nv_cos(double __a); __DEVICE__ float __nv_cosf(float __a); __DEVICE__ double __nv_cosh(double __a); __DEVICE__ float __nv_coshf(float __a); __DEVICE__ double __nv_cospi(double __a); __DEVICE__ float __nv_cospif(float __a); __DEVICE__ double __nv_cyl_bessel_i0(double __a); __DEVICE__ float __nv_cyl_bessel_i0f(float __a); __DEVICE__ double __nv_cyl_bessel_i1(double __a); __DEVICE__ float __nv_cyl_bessel_i1f(float __a); __DEVICE__ double __nv_dadd_rd(double __a, double __b); __DEVICE__ double __nv_dadd_rn(double __a, double __b); __DEVICE__ double __nv_dadd_ru(double __a, double __b); __DEVICE__ double __nv_dadd_rz(double __a, double __b); __DEVICE__ double __nv_ddiv_rd(double __a, double __b); __DEVICE__ double __nv_ddiv_rn(double __a, double __b); __DEVICE__ double __nv_ddiv_ru(double __a, double __b); __DEVICE__ double __nv_ddiv_rz(double __a, double __b); __DEVICE__ double __nv_dmul_rd(double __a, double __b); __DEVICE__ double __nv_dmul_rn(double __a, double __b); __DEVICE__ double __nv_dmul_ru(double __a, double __b); __DEVICE__ double __nv_dmul_rz(double __a, double __b); __DEVICE__ float __nv_double2float_rd(double __a); __DEVICE__ float __nv_double2float_rn(double __a); __DEVICE__ float __nv_double2float_ru(double __a); __DEVICE__ float __nv_double2float_rz(double __a); __DEVICE__ int __nv_double2hiint(double __a); __DEVICE__ int __nv_double2int_rd(double __a); __DEVICE__ int __nv_double2int_rn(double __a); __DEVICE__ int __nv_double2int_ru(double __a); __DEVICE__ int __nv_double2int_rz(double __a); __DEVICE__ long long __nv_double2ll_rd(double __a); __DEVICE__ long long __nv_double2ll_rn(double __a); __DEVICE__ long long __nv_double2ll_ru(double __a); __DEVICE__ long long __nv_double2ll_rz(double __a); __DEVICE__ int __nv_double2loint(double __a); __DEVICE__ unsigned int __nv_double2uint_rd(double __a); __DEVICE__ unsigned int __nv_double2uint_rn(double __a); __DEVICE__ unsigned int __nv_double2uint_ru(double __a); __DEVICE__ unsigned int __nv_double2uint_rz(double __a); __DEVICE__ unsigned long long __nv_double2ull_rd(double __a); __DEVICE__ unsigned long long __nv_double2ull_rn(double __a); __DEVICE__ unsigned long long __nv_double2ull_ru(double __a); __DEVICE__ unsigned long long __nv_double2ull_rz(double __a); __DEVICE__ unsigned long long __nv_double_as_longlong(double __a); __DEVICE__ double __nv_drcp_rd(double __a); __DEVICE__ double __nv_drcp_rn(double __a); __DEVICE__ double __nv_drcp_ru(double __a); __DEVICE__ double __nv_drcp_rz(double __a); __DEVICE__ double __nv_dsqrt_rd(double __a); __DEVICE__ double __nv_dsqrt_rn(double __a); __DEVICE__ double __nv_dsqrt_ru(double __a); __DEVICE__ double __nv_dsqrt_rz(double __a); __DEVICE__ double __nv_dsub_rd(double __a, double __b); __DEVICE__ double __nv_dsub_rn(double __a, double __b); __DEVICE__ double __nv_dsub_ru(double __a, double __b); __DEVICE__ double __nv_dsub_rz(double __a, double __b); __DEVICE__ double __nv_erfc(double __a); __DEVICE__ float __nv_erfcf(float __a); __DEVICE__ double __nv_erfcinv(double __a); __DEVICE__ float __nv_erfcinvf(float __a); __DEVICE__ double __nv_erfcx(double __a); __DEVICE__ float __nv_erfcxf(float __a); __DEVICE__ double __nv_erf(double __a); __DEVICE__ float __nv_erff(float __a); __DEVICE__ double __nv_erfinv(double __a); __DEVICE__ float __nv_erfinvf(float __a); __DEVICE__ double __nv_exp10(double __a); __DEVICE__ float __nv_exp10f(float __a); __DEVICE__ double __nv_exp2(double __a); __DEVICE__ float __nv_exp2f(float __a); __DEVICE__ double __nv_exp(double __a); __DEVICE__ float __nv_expf(float __a); __DEVICE__ double __nv_expm1(double __a); __DEVICE__ float __nv_expm1f(float __a); __DEVICE__ double __nv_fabs(double __a); __DEVICE__ float __nv_fabsf(float __a); __DEVICE__ float __nv_fadd_rd(float __a, float __b); __DEVICE__ float __nv_fadd_rn(float __a, float __b); __DEVICE__ float __nv_fadd_ru(float __a, float __b); __DEVICE__ float __nv_fadd_rz(float __a, float __b); __DEVICE__ float __nv_fast_cosf(float __a); __DEVICE__ float __nv_fast_exp10f(float __a); __DEVICE__ float __nv_fast_expf(float __a); __DEVICE__ float __nv_fast_fdividef(float __a, float __b); __DEVICE__ float __nv_fast_log10f(float __a); __DEVICE__ float __nv_fast_log2f(float __a); __DEVICE__ float __nv_fast_logf(float __a); __DEVICE__ float __nv_fast_powf(float __a, float __b); __DEVICE__ void __nv_fast_sincosf(float __a, float *__s, float *__c); __DEVICE__ float __nv_fast_sinf(float __a); __DEVICE__ float __nv_fast_tanf(float __a); __DEVICE__ double __nv_fdim(double __a, double __b); __DEVICE__ float __nv_fdimf(float __a, float __b); __DEVICE__ float __nv_fdiv_rd(float __a, float __b); __DEVICE__ float __nv_fdiv_rn(float __a, float __b); __DEVICE__ float __nv_fdiv_ru(float __a, float __b); __DEVICE__ float __nv_fdiv_rz(float __a, float __b); __DEVICE__ int __nv_ffs(int __a); __DEVICE__ int __nv_ffsll(long long __a); __DEVICE__ int __nv_finitef(float __a); __DEVICE__ unsigned short __nv_float2half_rn(float __a); __DEVICE__ int __nv_float2int_rd(float __a); __DEVICE__ int __nv_float2int_rn(float __a); __DEVICE__ int __nv_float2int_ru(float __a); __DEVICE__ int __nv_float2int_rz(float __a); __DEVICE__ long long __nv_float2ll_rd(float __a); __DEVICE__ long long __nv_float2ll_rn(float __a); __DEVICE__ long long __nv_float2ll_ru(float __a); __DEVICE__ long long __nv_float2ll_rz(float __a); __DEVICE__ unsigned int __nv_float2uint_rd(float __a); __DEVICE__ unsigned int __nv_float2uint_rn(float __a); __DEVICE__ unsigned int __nv_float2uint_ru(float __a); __DEVICE__ unsigned int __nv_float2uint_rz(float __a); __DEVICE__ unsigned long long __nv_float2ull_rd(float __a); __DEVICE__ unsigned long long __nv_float2ull_rn(float __a); __DEVICE__ unsigned long long __nv_float2ull_ru(float __a); __DEVICE__ unsigned long long __nv_float2ull_rz(float __a); __DEVICE__ int __nv_float_as_int(float __a); __DEVICE__ unsigned int __nv_float_as_uint(float __a); __DEVICE__ double __nv_floor(double __a); __DEVICE__ float __nv_floorf(float __a); __DEVICE__ double __nv_fma(double __a, double __b, double __c); __DEVICE__ float __nv_fmaf(float __a, float __b, float __c); __DEVICE__ float __nv_fmaf_ieee_rd(float __a, float __b, float __c); __DEVICE__ float __nv_fmaf_ieee_rn(float __a, float __b, float __c); __DEVICE__ float __nv_fmaf_ieee_ru(float __a, float __b, float __c); __DEVICE__ float __nv_fmaf_ieee_rz(float __a, float __b, float __c); __DEVICE__ float __nv_fmaf_rd(float __a, float __b, float __c); __DEVICE__ float __nv_fmaf_rn(float __a, float __b, float __c); __DEVICE__ float __nv_fmaf_ru(float __a, float __b, float __c); __DEVICE__ float __nv_fmaf_rz(float __a, float __b, float __c); __DEVICE__ double __nv_fma_rd(double __a, double __b, double __c); __DEVICE__ double __nv_fma_rn(double __a, double __b, double __c); __DEVICE__ double __nv_fma_ru(double __a, double __b, double __c); __DEVICE__ double __nv_fma_rz(double __a, double __b, double __c); __DEVICE__ double __nv_fmax(double __a, double __b); __DEVICE__ float __nv_fmaxf(float __a, float __b); __DEVICE__ double __nv_fmin(double __a, double __b); __DEVICE__ float __nv_fminf(float __a, float __b); __DEVICE__ double __nv_fmod(double __a, double __b); __DEVICE__ float __nv_fmodf(float __a, float __b); __DEVICE__ float __nv_fmul_rd(float __a, float __b); __DEVICE__ float __nv_fmul_rn(float __a, float __b); __DEVICE__ float __nv_fmul_ru(float __a, float __b); __DEVICE__ float __nv_fmul_rz(float __a, float __b); __DEVICE__ float __nv_frcp_rd(float __a); __DEVICE__ float __nv_frcp_rn(float __a); __DEVICE__ float __nv_frcp_ru(float __a); __DEVICE__ float __nv_frcp_rz(float __a); __DEVICE__ double __nv_frexp(double __a, int *__b); __DEVICE__ float __nv_frexpf(float __a, int *__b); __DEVICE__ float __nv_frsqrt_rn(float __a); __DEVICE__ float __nv_fsqrt_rd(float __a); __DEVICE__ float __nv_fsqrt_rn(float __a); __DEVICE__ float __nv_fsqrt_ru(float __a); __DEVICE__ float __nv_fsqrt_rz(float __a); __DEVICE__ float __nv_fsub_rd(float __a, float __b); __DEVICE__ float __nv_fsub_rn(float __a, float __b); __DEVICE__ float __nv_fsub_ru(float __a, float __b); __DEVICE__ float __nv_fsub_rz(float __a, float __b); __DEVICE__ int __nv_hadd(int __a, int __b); __DEVICE__ float __nv_half2float(unsigned short __h); __DEVICE__ double __nv_hiloint2double(int __a, int __b); __DEVICE__ double __nv_hypot(double __a, double __b); __DEVICE__ float __nv_hypotf(float __a, float __b); __DEVICE__ int __nv_ilogb(double __a); __DEVICE__ int __nv_ilogbf(float __a); __DEVICE__ double __nv_int2double_rn(int __a); __DEVICE__ float __nv_int2float_rd(int __a); __DEVICE__ float __nv_int2float_rn(int __a); __DEVICE__ float __nv_int2float_ru(int __a); __DEVICE__ float __nv_int2float_rz(int __a); __DEVICE__ float __nv_int_as_float(int __a); __DEVICE__ int __nv_isfinited(double __a); __DEVICE__ int __nv_isinfd(double __a); __DEVICE__ int __nv_isinff(float __a); __DEVICE__ int __nv_isnand(double __a); __DEVICE__ int __nv_isnanf(float __a); __DEVICE__ double __nv_j0(double __a); __DEVICE__ float __nv_j0f(float __a); __DEVICE__ double __nv_j1(double __a); __DEVICE__ float __nv_j1f(float __a); __DEVICE__ float __nv_jnf(int __a, float __b); __DEVICE__ double __nv_jn(int __a, double __b); __DEVICE__ double __nv_ldexp(double __a, int __b); __DEVICE__ float __nv_ldexpf(float __a, int __b); __DEVICE__ double __nv_lgamma(double __a); __DEVICE__ float __nv_lgammaf(float __a); __DEVICE__ double __nv_ll2double_rd(long long __a); __DEVICE__ double __nv_ll2double_rn(long long __a); __DEVICE__ double __nv_ll2double_ru(long long __a); __DEVICE__ double __nv_ll2double_rz(long long __a); __DEVICE__ float __nv_ll2float_rd(long long __a); __DEVICE__ float __nv_ll2float_rn(long long __a); __DEVICE__ float __nv_ll2float_ru(long long __a); __DEVICE__ float __nv_ll2float_rz(long long __a); __DEVICE__ long long __nv_llabs(long long __a); __DEVICE__ long long __nv_llmax(long long __a, long long __b); __DEVICE__ long long __nv_llmin(long long __a, long long __b); __DEVICE__ long long __nv_llrint(double __a); __DEVICE__ long long __nv_llrintf(float __a); __DEVICE__ long long __nv_llround(double __a); __DEVICE__ long long __nv_llroundf(float __a); __DEVICE__ double __nv_log10(double __a); __DEVICE__ float __nv_log10f(float __a); __DEVICE__ double __nv_log1p(double __a); __DEVICE__ float __nv_log1pf(float __a); __DEVICE__ double __nv_log2(double __a); __DEVICE__ float __nv_log2f(float __a); __DEVICE__ double __nv_logb(double __a); __DEVICE__ float __nv_logbf(float __a); __DEVICE__ double __nv_log(double __a); __DEVICE__ float __nv_logf(float __a); __DEVICE__ double __nv_longlong_as_double(long long __a); __DEVICE__ int __nv_max(int __a, int __b); __DEVICE__ int __nv_min(int __a, int __b); __DEVICE__ double __nv_modf(double __a, double *__b); __DEVICE__ float __nv_modff(float __a, float *__b); __DEVICE__ int __nv_mul24(int __a, int __b); __DEVICE__ long long __nv_mul64hi(long long __a, long long __b); __DEVICE__ int __nv_mulhi(int __a, int __b); __DEVICE__ double __nv_nan(const signed char *__a); __DEVICE__ float __nv_nanf(const signed char *__a); __DEVICE__ double __nv_nearbyint(double __a); __DEVICE__ float __nv_nearbyintf(float __a); __DEVICE__ double __nv_nextafter(double __a, double __b); __DEVICE__ float __nv_nextafterf(float __a, float __b); __DEVICE__ double __nv_norm3d(double __a, double __b, double __c); __DEVICE__ float __nv_norm3df(float __a, float __b, float __c); __DEVICE__ double __nv_norm4d(double __a, double __b, double __c, double __d); __DEVICE__ float __nv_norm4df(float __a, float __b, float __c, float __d); __DEVICE__ double __nv_normcdf(double __a); __DEVICE__ float __nv_normcdff(float __a); __DEVICE__ double __nv_normcdfinv(double __a); __DEVICE__ float __nv_normcdfinvf(float __a); __DEVICE__ float __nv_normf(int __a, const float *__b); __DEVICE__ double __nv_norm(int __a, const double *__b); __DEVICE__ int __nv_popc(int __a); __DEVICE__ int __nv_popcll(long long __a); __DEVICE__ double __nv_pow(double __a, double __b); __DEVICE__ float __nv_powf(float __a, float __b); __DEVICE__ double __nv_powi(double __a, int __b); __DEVICE__ float __nv_powif(float __a, int __b); __DEVICE__ double __nv_rcbrt(double __a); __DEVICE__ float __nv_rcbrtf(float __a); __DEVICE__ double __nv_rcp64h(double __a); __DEVICE__ double __nv_remainder(double __a, double __b); __DEVICE__ float __nv_remainderf(float __a, float __b); __DEVICE__ double __nv_remquo(double __a, double __b, int *__c); __DEVICE__ float __nv_remquof(float __a, float __b, int *__c); __DEVICE__ int __nv_rhadd(int __a, int __b); __DEVICE__ double __nv_rhypot(double __a, double __b); __DEVICE__ float __nv_rhypotf(float __a, float __b); __DEVICE__ double __nv_rint(double __a); __DEVICE__ float __nv_rintf(float __a); __DEVICE__ double __nv_rnorm3d(double __a, double __b, double __c); __DEVICE__ float __nv_rnorm3df(float __a, float __b, float __c); __DEVICE__ double __nv_rnorm4d(double __a, double __b, double __c, double __d); __DEVICE__ float __nv_rnorm4df(float __a, float __b, float __c, float __d); __DEVICE__ float __nv_rnormf(int __a, const float *__b); __DEVICE__ double __nv_rnorm(int __a, const double *__b); __DEVICE__ double __nv_round(double __a); __DEVICE__ float __nv_roundf(float __a); __DEVICE__ double __nv_rsqrt(double __a); __DEVICE__ float __nv_rsqrtf(float __a); __DEVICE__ int __nv_sad(int __a, int __b, int __c); __DEVICE__ float __nv_saturatef(float __a); __DEVICE__ double __nv_scalbn(double __a, int __b); __DEVICE__ float __nv_scalbnf(float __a, int __b); __DEVICE__ int __nv_signbitd(double __a); __DEVICE__ int __nv_signbitf(float __a); __DEVICE__ void __nv_sincos(double __a, double *__b, double *__c); __DEVICE__ void __nv_sincosf(float __a, float *__b, float *__c); __DEVICE__ void __nv_sincospi(double __a, double *__b, double *__c); __DEVICE__ void __nv_sincospif(float __a, float *__b, float *__c); __DEVICE__ double __nv_sin(double __a); __DEVICE__ float __nv_sinf(float __a); __DEVICE__ double __nv_sinh(double __a); __DEVICE__ float __nv_sinhf(float __a); __DEVICE__ double __nv_sinpi(double __a); __DEVICE__ float __nv_sinpif(float __a); __DEVICE__ double __nv_sqrt(double __a); __DEVICE__ float __nv_sqrtf(float __a); __DEVICE__ double __nv_tan(double __a); __DEVICE__ float __nv_tanf(float __a); __DEVICE__ double __nv_tanh(double __a); __DEVICE__ float __nv_tanhf(float __a); __DEVICE__ double __nv_tgamma(double __a); __DEVICE__ float __nv_tgammaf(float __a); __DEVICE__ double __nv_trunc(double __a); __DEVICE__ float __nv_truncf(float __a); __DEVICE__ int __nv_uhadd(unsigned int __a, unsigned int __b); __DEVICE__ double __nv_uint2double_rn(unsigned int __i); __DEVICE__ float __nv_uint2float_rd(unsigned int __a); __DEVICE__ float __nv_uint2float_rn(unsigned int __a); __DEVICE__ float __nv_uint2float_ru(unsigned int __a); __DEVICE__ float __nv_uint2float_rz(unsigned int __a); __DEVICE__ float __nv_uint_as_float(unsigned int __a); __DEVICE__ double __nv_ull2double_rd(unsigned long long __a); __DEVICE__ double __nv_ull2double_rn(unsigned long long __a); __DEVICE__ double __nv_ull2double_ru(unsigned long long __a); __DEVICE__ double __nv_ull2double_rz(unsigned long long __a); __DEVICE__ float __nv_ull2float_rd(unsigned long long __a); __DEVICE__ float __nv_ull2float_rn(unsigned long long __a); __DEVICE__ float __nv_ull2float_ru(unsigned long long __a); __DEVICE__ float __nv_ull2float_rz(unsigned long long __a); __DEVICE__ unsigned long long __nv_ullmax(unsigned long long __a, unsigned long long __b); __DEVICE__ unsigned long long __nv_ullmin(unsigned long long __a, unsigned long long __b); __DEVICE__ unsigned int __nv_umax(unsigned int __a, unsigned int __b); __DEVICE__ unsigned int __nv_umin(unsigned int __a, unsigned int __b); __DEVICE__ unsigned int __nv_umul24(unsigned int __a, unsigned int __b); __DEVICE__ unsigned long long __nv_umul64hi(unsigned long long __a, unsigned long long __b); __DEVICE__ unsigned int __nv_umulhi(unsigned int __a, unsigned int __b); __DEVICE__ unsigned int __nv_urhadd(unsigned int __a, unsigned int __b); __DEVICE__ unsigned int __nv_usad(unsigned int __a, unsigned int __b, unsigned int __c); #if CUDA_VERSION >= 9000 && CUDA_VERSION < 9020 __DEVICE__ int __nv_vabs2(int __a); __DEVICE__ int __nv_vabs4(int __a); __DEVICE__ int __nv_vabsdiffs2(int __a, int __b); __DEVICE__ int __nv_vabsdiffs4(int __a, int __b); __DEVICE__ int __nv_vabsdiffu2(int __a, int __b); __DEVICE__ int __nv_vabsdiffu4(int __a, int __b); __DEVICE__ int __nv_vabsss2(int __a); __DEVICE__ int __nv_vabsss4(int __a); __DEVICE__ int __nv_vadd2(int __a, int __b); __DEVICE__ int __nv_vadd4(int __a, int __b); __DEVICE__ int __nv_vaddss2(int __a, int __b); __DEVICE__ int __nv_vaddss4(int __a, int __b); __DEVICE__ int __nv_vaddus2(int __a, int __b); __DEVICE__ int __nv_vaddus4(int __a, int __b); __DEVICE__ int __nv_vavgs2(int __a, int __b); __DEVICE__ int __nv_vavgs4(int __a, int __b); __DEVICE__ int __nv_vavgu2(int __a, int __b); __DEVICE__ int __nv_vavgu4(int __a, int __b); __DEVICE__ int __nv_vcmpeq2(int __a, int __b); __DEVICE__ int __nv_vcmpeq4(int __a, int __b); __DEVICE__ int __nv_vcmpges2(int __a, int __b); __DEVICE__ int __nv_vcmpges4(int __a, int __b); __DEVICE__ int __nv_vcmpgeu2(int __a, int __b); __DEVICE__ int __nv_vcmpgeu4(int __a, int __b); __DEVICE__ int __nv_vcmpgts2(int __a, int __b); __DEVICE__ int __nv_vcmpgts4(int __a, int __b); __DEVICE__ int __nv_vcmpgtu2(int __a, int __b); __DEVICE__ int __nv_vcmpgtu4(int __a, int __b); __DEVICE__ int __nv_vcmples2(int __a, int __b); __DEVICE__ int __nv_vcmples4(int __a, int __b); __DEVICE__ int __nv_vcmpleu2(int __a, int __b); __DEVICE__ int __nv_vcmpleu4(int __a, int __b); __DEVICE__ int __nv_vcmplts2(int __a, int __b); __DEVICE__ int __nv_vcmplts4(int __a, int __b); __DEVICE__ int __nv_vcmpltu2(int __a, int __b); __DEVICE__ int __nv_vcmpltu4(int __a, int __b); __DEVICE__ int __nv_vcmpne2(int __a, int __b); __DEVICE__ int __nv_vcmpne4(int __a, int __b); __DEVICE__ int __nv_vhaddu2(int __a, int __b); __DEVICE__ int __nv_vhaddu4(int __a, int __b); __DEVICE__ int __nv_vmaxs2(int __a, int __b); __DEVICE__ int __nv_vmaxs4(int __a, int __b); __DEVICE__ int __nv_vmaxu2(int __a, int __b); __DEVICE__ int __nv_vmaxu4(int __a, int __b); __DEVICE__ int __nv_vmins2(int __a, int __b); __DEVICE__ int __nv_vmins4(int __a, int __b); __DEVICE__ int __nv_vminu2(int __a, int __b); __DEVICE__ int __nv_vminu4(int __a, int __b); __DEVICE__ int __nv_vneg2(int __a); __DEVICE__ int __nv_vneg4(int __a); __DEVICE__ int __nv_vnegss2(int __a); __DEVICE__ int __nv_vnegss4(int __a); __DEVICE__ int __nv_vsads2(int __a, int __b); __DEVICE__ int __nv_vsads4(int __a, int __b); __DEVICE__ int __nv_vsadu2(int __a, int __b); __DEVICE__ int __nv_vsadu4(int __a, int __b); __DEVICE__ int __nv_vseteq2(int __a, int __b); __DEVICE__ int __nv_vseteq4(int __a, int __b); __DEVICE__ int __nv_vsetges2(int __a, int __b); __DEVICE__ int __nv_vsetges4(int __a, int __b); __DEVICE__ int __nv_vsetgeu2(int __a, int __b); __DEVICE__ int __nv_vsetgeu4(int __a, int __b); __DEVICE__ int __nv_vsetgts2(int __a, int __b); __DEVICE__ int __nv_vsetgts4(int __a, int __b); __DEVICE__ int __nv_vsetgtu2(int __a, int __b); __DEVICE__ int __nv_vsetgtu4(int __a, int __b); __DEVICE__ int __nv_vsetles2(int __a, int __b); __DEVICE__ int __nv_vsetles4(int __a, int __b); __DEVICE__ int __nv_vsetleu2(int __a, int __b); __DEVICE__ int __nv_vsetleu4(int __a, int __b); __DEVICE__ int __nv_vsetlts2(int __a, int __b); __DEVICE__ int __nv_vsetlts4(int __a, int __b); __DEVICE__ int __nv_vsetltu2(int __a, int __b); __DEVICE__ int __nv_vsetltu4(int __a, int __b); __DEVICE__ int __nv_vsetne2(int __a, int __b); __DEVICE__ int __nv_vsetne4(int __a, int __b); __DEVICE__ int __nv_vsub2(int __a, int __b); __DEVICE__ int __nv_vsub4(int __a, int __b); __DEVICE__ int __nv_vsubss2(int __a, int __b); __DEVICE__ int __nv_vsubss4(int __a, int __b); __DEVICE__ int __nv_vsubus2(int __a, int __b); __DEVICE__ int __nv_vsubus4(int __a, int __b); #endif // CUDA_VERSION __DEVICE__ double __nv_y0(double __a); __DEVICE__ float __nv_y0f(float __a); __DEVICE__ double __nv_y1(double __a); __DEVICE__ float __nv_y1f(float __a); __DEVICE__ float __nv_ynf(int __a, float __b); __DEVICE__ double __nv_yn(int __a, double __b); #if defined(__OPENMP_NVPTX__) #pragma omp end assumes ext_spmd_amenable no_openmp #endif #if defined(__cplusplus) } // extern "C" #endif #endif // __CLANG_CUDA_LIBDEVICE_DECLARES_H__ /*===---- __clang_cuda_math.h - Device-side CUDA math support --------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CLANG_CUDA_MATH_H__ #define __CLANG_CUDA_MATH_H__ #ifndef __CUDA__ #error "This file is for CUDA compilation only." #endif #ifndef __OPENMP_NVPTX__ #if CUDA_VERSION < 9000 #error This file is intended to be used with CUDA-9+ only. #endif #endif // __DEVICE__ is a helper macro with common set of attributes for the wrappers // we implement in this file. We need static in order to avoid emitting unused // functions and __forceinline__ helps inlining these wrappers at -O1. #pragma push_macro("__DEVICE__") #ifdef __OPENMP_NVPTX__ #if defined(__cplusplus) #define __DEVICE__ static constexpr __attribute__((always_inline, nothrow)) #else #define __DEVICE__ static __attribute__((always_inline, nothrow)) #endif #else #define __DEVICE__ static __device__ __forceinline__ #endif // Specialized version of __DEVICE__ for functions with void return type. Needed // because the OpenMP overlay requires constexpr functions here but prior to // c++14 void return functions could not be constexpr. #pragma push_macro("__DEVICE_VOID__") #if defined(__OPENMP_NVPTX__) && defined(__cplusplus) && __cplusplus < 201402L #define __DEVICE_VOID__ static __attribute__((always_inline, nothrow)) #else #define __DEVICE_VOID__ __DEVICE__ #endif // libdevice provides fast low precision and slow full-recision implementations // for some functions. Which one gets selected depends on // __CLANG_CUDA_APPROX_TRANSCENDENTALS__ which gets defined by clang if // -ffast-math or -fcuda-approx-transcendentals are in effect. #pragma push_macro("__FAST_OR_SLOW") #if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__) #define __FAST_OR_SLOW(fast, slow) fast #else #define __FAST_OR_SLOW(fast, slow) slow #endif __DEVICE__ int abs(int __a) { return __nv_abs(__a); } __DEVICE__ double fabs(double __a) { return __nv_fabs(__a); } __DEVICE__ double acos(double __a) { return __nv_acos(__a); } __DEVICE__ float acosf(float __a) { return __nv_acosf(__a); } __DEVICE__ double acosh(double __a) { return __nv_acosh(__a); } __DEVICE__ float acoshf(float __a) { return __nv_acoshf(__a); } __DEVICE__ double asin(double __a) { return __nv_asin(__a); } __DEVICE__ float asinf(float __a) { return __nv_asinf(__a); } __DEVICE__ double asinh(double __a) { return __nv_asinh(__a); } __DEVICE__ float asinhf(float __a) { return __nv_asinhf(__a); } __DEVICE__ double atan(double __a) { return __nv_atan(__a); } __DEVICE__ double atan2(double __a, double __b) { return __nv_atan2(__a, __b); } __DEVICE__ float atan2f(float __a, float __b) { return __nv_atan2f(__a, __b); } __DEVICE__ float atanf(float __a) { return __nv_atanf(__a); } __DEVICE__ double atanh(double __a) { return __nv_atanh(__a); } __DEVICE__ float atanhf(float __a) { return __nv_atanhf(__a); } __DEVICE__ double cbrt(double __a) { return __nv_cbrt(__a); } __DEVICE__ float cbrtf(float __a) { return __nv_cbrtf(__a); } __DEVICE__ double ceil(double __a) { return __nv_ceil(__a); } __DEVICE__ float ceilf(float __a) { return __nv_ceilf(__a); } __DEVICE__ double copysign(double __a, double __b) { return __nv_copysign(__a, __b); } __DEVICE__ float copysignf(float __a, float __b) { return __nv_copysignf(__a, __b); } __DEVICE__ double cos(double __a) { return __nv_cos(__a); } __DEVICE__ float cosf(float __a) { return __FAST_OR_SLOW(__nv_fast_cosf, __nv_cosf)(__a); } __DEVICE__ double cosh(double __a) { return __nv_cosh(__a); } __DEVICE__ float coshf(float __a) { return __nv_coshf(__a); } __DEVICE__ double cospi(double __a) { return __nv_cospi(__a); } __DEVICE__ float cospif(float __a) { return __nv_cospif(__a); } __DEVICE__ double cyl_bessel_i0(double __a) { return __nv_cyl_bessel_i0(__a); } __DEVICE__ float cyl_bessel_i0f(float __a) { return __nv_cyl_bessel_i0f(__a); } __DEVICE__ double cyl_bessel_i1(double __a) { return __nv_cyl_bessel_i1(__a); } __DEVICE__ float cyl_bessel_i1f(float __a) { return __nv_cyl_bessel_i1f(__a); } __DEVICE__ double erf(double __a) { return __nv_erf(__a); } __DEVICE__ double erfc(double __a) { return __nv_erfc(__a); } __DEVICE__ float erfcf(float __a) { return __nv_erfcf(__a); } __DEVICE__ double erfcinv(double __a) { return __nv_erfcinv(__a); } __DEVICE__ float erfcinvf(float __a) { return __nv_erfcinvf(__a); } __DEVICE__ double erfcx(double __a) { return __nv_erfcx(__a); } __DEVICE__ float erfcxf(float __a) { return __nv_erfcxf(__a); } __DEVICE__ float erff(float __a) { return __nv_erff(__a); } __DEVICE__ double erfinv(double __a) { return __nv_erfinv(__a); } __DEVICE__ float erfinvf(float __a) { return __nv_erfinvf(__a); } __DEVICE__ double exp(double __a) { return __nv_exp(__a); } __DEVICE__ double exp10(double __a) { return __nv_exp10(__a); } __DEVICE__ float exp10f(float __a) { return __nv_exp10f(__a); } __DEVICE__ double exp2(double __a) { return __nv_exp2(__a); } __DEVICE__ float exp2f(float __a) { return __nv_exp2f(__a); } __DEVICE__ float expf(float __a) { return __nv_expf(__a); } __DEVICE__ double expm1(double __a) { return __nv_expm1(__a); } __DEVICE__ float expm1f(float __a) { return __nv_expm1f(__a); } __DEVICE__ float fabsf(float __a) { return __nv_fabsf(__a); } __DEVICE__ double fdim(double __a, double __b) { return __nv_fdim(__a, __b); } __DEVICE__ float fdimf(float __a, float __b) { return __nv_fdimf(__a, __b); } __DEVICE__ double fdivide(double __a, double __b) { return __a / __b; } __DEVICE__ float fdividef(float __a, float __b) { #if __FAST_MATH__ && !__CUDA_PREC_DIV return __nv_fast_fdividef(__a, __b); #else return __a / __b; #endif } __DEVICE__ double floor(double __f) { return __nv_floor(__f); } __DEVICE__ float floorf(float __f) { return __nv_floorf(__f); } __DEVICE__ double fma(double __a, double __b, double __c) { return __nv_fma(__a, __b, __c); } __DEVICE__ float fmaf(float __a, float __b, float __c) { return __nv_fmaf(__a, __b, __c); } __DEVICE__ double fmax(double __a, double __b) { return __nv_fmax(__a, __b); } __DEVICE__ float fmaxf(float __a, float __b) { return __nv_fmaxf(__a, __b); } __DEVICE__ double fmin(double __a, double __b) { return __nv_fmin(__a, __b); } __DEVICE__ float fminf(float __a, float __b) { return __nv_fminf(__a, __b); } __DEVICE__ double fmod(double __a, double __b) { return __nv_fmod(__a, __b); } __DEVICE__ float fmodf(float __a, float __b) { return __nv_fmodf(__a, __b); } __DEVICE__ double frexp(double __a, int *__b) { return __nv_frexp(__a, __b); } __DEVICE__ float frexpf(float __a, int *__b) { return __nv_frexpf(__a, __b); } __DEVICE__ double hypot(double __a, double __b) { return __nv_hypot(__a, __b); } __DEVICE__ float hypotf(float __a, float __b) { return __nv_hypotf(__a, __b); } __DEVICE__ int ilogb(double __a) { return __nv_ilogb(__a); } __DEVICE__ int ilogbf(float __a) { return __nv_ilogbf(__a); } __DEVICE__ double j0(double __a) { return __nv_j0(__a); } __DEVICE__ float j0f(float __a) { return __nv_j0f(__a); } __DEVICE__ double j1(double __a) { return __nv_j1(__a); } __DEVICE__ float j1f(float __a) { return __nv_j1f(__a); } __DEVICE__ double jn(int __n, double __a) { return __nv_jn(__n, __a); } __DEVICE__ float jnf(int __n, float __a) { return __nv_jnf(__n, __a); } #if defined(__LP64__) || defined(_WIN64) __DEVICE__ long labs(long __a) { return __nv_llabs(__a); }; #else __DEVICE__ long labs(long __a) { return __nv_abs(__a); }; #endif __DEVICE__ double ldexp(double __a, int __b) { return __nv_ldexp(__a, __b); } __DEVICE__ float ldexpf(float __a, int __b) { return __nv_ldexpf(__a, __b); } __DEVICE__ double lgamma(double __a) { return __nv_lgamma(__a); } __DEVICE__ float lgammaf(float __a) { return __nv_lgammaf(__a); } __DEVICE__ long long llabs(long long __a) { return __nv_llabs(__a); } __DEVICE__ long long llmax(long long __a, long long __b) { return __nv_llmax(__a, __b); } __DEVICE__ long long llmin(long long __a, long long __b) { return __nv_llmin(__a, __b); } __DEVICE__ long long llrint(double __a) { return __nv_llrint(__a); } __DEVICE__ long long llrintf(float __a) { return __nv_llrintf(__a); } __DEVICE__ long long llround(double __a) { return __nv_llround(__a); } __DEVICE__ long long llroundf(float __a) { return __nv_llroundf(__a); } __DEVICE__ double round(double __a) { return __nv_round(__a); } __DEVICE__ float roundf(float __a) { return __nv_roundf(__a); } __DEVICE__ double log(double __a) { return __nv_log(__a); } __DEVICE__ double log10(double __a) { return __nv_log10(__a); } __DEVICE__ float log10f(float __a) { return __nv_log10f(__a); } __DEVICE__ double log1p(double __a) { return __nv_log1p(__a); } __DEVICE__ float log1pf(float __a) { return __nv_log1pf(__a); } __DEVICE__ double log2(double __a) { return __nv_log2(__a); } __DEVICE__ float log2f(float __a) { return __FAST_OR_SLOW(__nv_fast_log2f, __nv_log2f)(__a); } __DEVICE__ double logb(double __a) { return __nv_logb(__a); } __DEVICE__ float logbf(float __a) { return __nv_logbf(__a); } __DEVICE__ float logf(float __a) { return __FAST_OR_SLOW(__nv_fast_logf, __nv_logf)(__a); } #if defined(__LP64__) || defined(_WIN64) __DEVICE__ long lrint(double __a) { return llrint(__a); } __DEVICE__ long lrintf(float __a) { return __float2ll_rn(__a); } __DEVICE__ long lround(double __a) { return llround(__a); } __DEVICE__ long lroundf(float __a) { return llroundf(__a); } #else __DEVICE__ long lrint(double __a) { return (long)rint(__a); } __DEVICE__ long lrintf(float __a) { return __float2int_rn(__a); } __DEVICE__ long lround(double __a) { return round(__a); } __DEVICE__ long lroundf(float __a) { return roundf(__a); } #endif __DEVICE__ int max(int __a, int __b) { return __nv_max(__a, __b); } __DEVICE__ int min(int __a, int __b) { return __nv_min(__a, __b); } __DEVICE__ double modf(double __a, double *__b) { return __nv_modf(__a, __b); } __DEVICE__ float modff(float __a, float *__b) { return __nv_modff(__a, __b); } __DEVICE__ double nearbyint(double __a) { return __builtin_nearbyint(__a); } __DEVICE__ float nearbyintf(float __a) { return __builtin_nearbyintf(__a); } __DEVICE__ double nextafter(double __a, double __b) { return __nv_nextafter(__a, __b); } __DEVICE__ float nextafterf(float __a, float __b) { return __nv_nextafterf(__a, __b); } __DEVICE__ double norm(int __dim, const double *__t) { return __nv_norm(__dim, __t); } __DEVICE__ double norm3d(double __a, double __b, double __c) { return __nv_norm3d(__a, __b, __c); } __DEVICE__ float norm3df(float __a, float __b, float __c) { return __nv_norm3df(__a, __b, __c); } __DEVICE__ double norm4d(double __a, double __b, double __c, double __d) { return __nv_norm4d(__a, __b, __c, __d); } __DEVICE__ float norm4df(float __a, float __b, float __c, float __d) { return __nv_norm4df(__a, __b, __c, __d); } __DEVICE__ double normcdf(double __a) { return __nv_normcdf(__a); } __DEVICE__ float normcdff(float __a) { return __nv_normcdff(__a); } __DEVICE__ double normcdfinv(double __a) { return __nv_normcdfinv(__a); } __DEVICE__ float normcdfinvf(float __a) { return __nv_normcdfinvf(__a); } __DEVICE__ float normf(int __dim, const float *__t) { return __nv_normf(__dim, __t); } __DEVICE__ double pow(double __a, double __b) { return __nv_pow(__a, __b); } __DEVICE__ float powf(float __a, float __b) { return __nv_powf(__a, __b); } __DEVICE__ double powi(double __a, int __b) { return __nv_powi(__a, __b); } __DEVICE__ float powif(float __a, int __b) { return __nv_powif(__a, __b); } __DEVICE__ double rcbrt(double __a) { return __nv_rcbrt(__a); } __DEVICE__ float rcbrtf(float __a) { return __nv_rcbrtf(__a); } __DEVICE__ double remainder(double __a, double __b) { return __nv_remainder(__a, __b); } __DEVICE__ float remainderf(float __a, float __b) { return __nv_remainderf(__a, __b); } __DEVICE__ double remquo(double __a, double __b, int *__c) { return __nv_remquo(__a, __b, __c); } __DEVICE__ float remquof(float __a, float __b, int *__c) { return __nv_remquof(__a, __b, __c); } __DEVICE__ double rhypot(double __a, double __b) { return __nv_rhypot(__a, __b); } __DEVICE__ float rhypotf(float __a, float __b) { return __nv_rhypotf(__a, __b); } // __nv_rint* in libdevice is buggy and produces incorrect results. __DEVICE__ double rint(double __a) { return __builtin_rint(__a); } __DEVICE__ float rintf(float __a) { return __builtin_rintf(__a); } __DEVICE__ double rnorm(int __a, const double *__b) { return __nv_rnorm(__a, __b); } __DEVICE__ double rnorm3d(double __a, double __b, double __c) { return __nv_rnorm3d(__a, __b, __c); } __DEVICE__ float rnorm3df(float __a, float __b, float __c) { return __nv_rnorm3df(__a, __b, __c); } __DEVICE__ double rnorm4d(double __a, double __b, double __c, double __d) { return __nv_rnorm4d(__a, __b, __c, __d); } __DEVICE__ float rnorm4df(float __a, float __b, float __c, float __d) { return __nv_rnorm4df(__a, __b, __c, __d); } __DEVICE__ float rnormf(int __dim, const float *__t) { return __nv_rnormf(__dim, __t); } __DEVICE__ double rsqrt(double __a) { return __nv_rsqrt(__a); } __DEVICE__ float rsqrtf(float __a) { return __nv_rsqrtf(__a); } __DEVICE__ double scalbn(double __a, int __b) { return __nv_scalbn(__a, __b); } __DEVICE__ float scalbnf(float __a, int __b) { return __nv_scalbnf(__a, __b); } __DEVICE__ double scalbln(double __a, long __b) { if (__b > INT_MAX) return __a > 0 ? HUGE_VAL : -HUGE_VAL; if (__b < INT_MIN) return __a > 0 ? 0.0 : -0.0; return scalbn(__a, (int)__b); } __DEVICE__ float scalblnf(float __a, long __b) { if (__b > INT_MAX) return __a > 0 ? HUGE_VALF : -HUGE_VALF; if (__b < INT_MIN) return __a > 0 ? 0.f : -0.f; return scalbnf(__a, (int)__b); } __DEVICE__ double sin(double __a) { return __nv_sin(__a); } __DEVICE_VOID__ void sincos(double __a, double *__s, double *__c) { return __nv_sincos(__a, __s, __c); } __DEVICE_VOID__ void sincosf(float __a, float *__s, float *__c) { return __FAST_OR_SLOW(__nv_fast_sincosf, __nv_sincosf)(__a, __s, __c); } __DEVICE_VOID__ void sincospi(double __a, double *__s, double *__c) { return __nv_sincospi(__a, __s, __c); } __DEVICE_VOID__ void sincospif(float __a, float *__s, float *__c) { return __nv_sincospif(__a, __s, __c); } __DEVICE__ float sinf(float __a) { return __FAST_OR_SLOW(__nv_fast_sinf, __nv_sinf)(__a); } __DEVICE__ double sinh(double __a) { return __nv_sinh(__a); } __DEVICE__ float sinhf(float __a) { return __nv_sinhf(__a); } __DEVICE__ double sinpi(double __a) { return __nv_sinpi(__a); } __DEVICE__ float sinpif(float __a) { return __nv_sinpif(__a); } __DEVICE__ double sqrt(double __a) { return __nv_sqrt(__a); } __DEVICE__ float sqrtf(float __a) { return __nv_sqrtf(__a); } __DEVICE__ double tan(double __a) { return __nv_tan(__a); } __DEVICE__ float tanf(float __a) { return __nv_tanf(__a); } __DEVICE__ double tanh(double __a) { return __nv_tanh(__a); } __DEVICE__ float tanhf(float __a) { return __nv_tanhf(__a); } __DEVICE__ double tgamma(double __a) { return __nv_tgamma(__a); } __DEVICE__ float tgammaf(float __a) { return __nv_tgammaf(__a); } __DEVICE__ double trunc(double __a) { return __nv_trunc(__a); } __DEVICE__ float truncf(float __a) { return __nv_truncf(__a); } __DEVICE__ unsigned long long ullmax(unsigned long long __a, unsigned long long __b) { return __nv_ullmax(__a, __b); } __DEVICE__ unsigned long long ullmin(unsigned long long __a, unsigned long long __b) { return __nv_ullmin(__a, __b); } __DEVICE__ unsigned int umax(unsigned int __a, unsigned int __b) { return __nv_umax(__a, __b); } __DEVICE__ unsigned int umin(unsigned int __a, unsigned int __b) { return __nv_umin(__a, __b); } __DEVICE__ double y0(double __a) { return __nv_y0(__a); } __DEVICE__ float y0f(float __a) { return __nv_y0f(__a); } __DEVICE__ double y1(double __a) { return __nv_y1(__a); } __DEVICE__ float y1f(float __a) { return __nv_y1f(__a); } __DEVICE__ double yn(int __a, double __b) { return __nv_yn(__a, __b); } __DEVICE__ float ynf(int __a, float __b) { return __nv_ynf(__a, __b); } #pragma pop_macro("__DEVICE__") #pragma pop_macro("__DEVICE_VOID__") #pragma pop_macro("__FAST_OR_SLOW") #endif // __CLANG_CUDA_MATH_H__ /builtins/__clang_cuda_math_forward_declares.h/*===- __clang_math_forward_declares.h - Prototypes of __device__ math fns --=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CLANG__CUDA_MATH_FORWARD_DECLARES_H__ #define __CLANG__CUDA_MATH_FORWARD_DECLARES_H__ #if !defined(__CUDA__) && !__HIP__ #error "This file is for CUDA/HIP compilation only." #endif // This file forward-declares of some math functions we (or the CUDA headers) // will define later. We need to do this, and do it before cmath is included, // because the standard library may have constexpr math functions. In the // absence of a prior __device__ decl, those constexpr functions may become // implicitly host+device. host+device functions can't be overloaded, so that // would preclude the use of our own __device__ overloads for these functions. #pragma push_macro("__DEVICE__") #define __DEVICE__ \ static __inline__ __attribute__((always_inline)) __attribute__((device)) __DEVICE__ long abs(long); __DEVICE__ long long abs(long long); __DEVICE__ double abs(double); __DEVICE__ float abs(float); __DEVICE__ int abs(int); __DEVICE__ double acos(double); __DEVICE__ float acos(float); __DEVICE__ double acosh(double); __DEVICE__ float acosh(float); __DEVICE__ double asin(double); __DEVICE__ float asin(float); __DEVICE__ double asinh(double); __DEVICE__ float asinh(float); __DEVICE__ double atan2(double, double); __DEVICE__ float atan2(float, float); __DEVICE__ double atan(double); __DEVICE__ float atan(float); __DEVICE__ double atanh(double); __DEVICE__ float atanh(float); __DEVICE__ double cbrt(double); __DEVICE__ float cbrt(float); __DEVICE__ double ceil(double); __DEVICE__ float ceil(float); __DEVICE__ double copysign(double, double); __DEVICE__ float copysign(float, float); __DEVICE__ double cos(double); __DEVICE__ float cos(float); __DEVICE__ double cosh(double); __DEVICE__ float cosh(float); __DEVICE__ double erfc(double); __DEVICE__ float erfc(float); __DEVICE__ double erf(double); __DEVICE__ float erf(float); __DEVICE__ double exp2(double); __DEVICE__ float exp2(float); __DEVICE__ double exp(double); __DEVICE__ float exp(float); __DEVICE__ double expm1(double); __DEVICE__ float expm1(float); __DEVICE__ double fabs(double); __DEVICE__ float fabs(float); __DEVICE__ double fdim(double, double); __DEVICE__ float fdim(float, float); __DEVICE__ double floor(double); __DEVICE__ float floor(float); __DEVICE__ double fma(double, double, double); __DEVICE__ float fma(float, float, float); __DEVICE__ double fmax(double, double); __DEVICE__ float fmax(float, float); __DEVICE__ double fmin(double, double); __DEVICE__ float fmin(float, float); __DEVICE__ double fmod(double, double); __DEVICE__ float fmod(float, float); __DEVICE__ int fpclassify(double); __DEVICE__ int fpclassify(float); __DEVICE__ double frexp(double, int *); __DEVICE__ float frexp(float, int *); __DEVICE__ double hypot(double, double); __DEVICE__ float hypot(float, float); __DEVICE__ int ilogb(double); __DEVICE__ int ilogb(float); #ifdef _MSC_VER __DEVICE__ bool isfinite(long double); #endif __DEVICE__ bool isfinite(double); __DEVICE__ bool isfinite(float); __DEVICE__ bool isgreater(double, double); __DEVICE__ bool isgreaterequal(double, double); __DEVICE__ bool isgreaterequal(float, float); __DEVICE__ bool isgreater(float, float); #ifdef _MSC_VER __DEVICE__ bool isinf(long double); #endif __DEVICE__ bool isinf(double); __DEVICE__ bool isinf(float); __DEVICE__ bool isless(double, double); __DEVICE__ bool islessequal(double, double); __DEVICE__ bool islessequal(float, float); __DEVICE__ bool isless(float, float); __DEVICE__ bool islessgreater(double, double); __DEVICE__ bool islessgreater(float, float); #ifdef _MSC_VER __DEVICE__ bool isnan(long double); #endif __DEVICE__ bool isnan(double); __DEVICE__ bool isnan(float); __DEVICE__ bool isnormal(double); __DEVICE__ bool isnormal(float); __DEVICE__ bool isunordered(double, double); __DEVICE__ bool isunordered(float, float); __DEVICE__ long labs(long); __DEVICE__ double ldexp(double, int); __DEVICE__ float ldexp(float, int); __DEVICE__ double lgamma(double); __DEVICE__ float lgamma(float); __DEVICE__ long long llabs(long long); __DEVICE__ long long llrint(double); __DEVICE__ long long llrint(float); __DEVICE__ double log10(double); __DEVICE__ float log10(float); __DEVICE__ double log1p(double); __DEVICE__ float log1p(float); __DEVICE__ double log2(double); __DEVICE__ float log2(float); __DEVICE__ double logb(double); __DEVICE__ float logb(float); __DEVICE__ double log(double); __DEVICE__ float log(float); __DEVICE__ long lrint(double); __DEVICE__ long lrint(float); __DEVICE__ long lround(double); __DEVICE__ long lround(float); __DEVICE__ long long llround(float); // No llround(double). __DEVICE__ double modf(double, double *); __DEVICE__ float modf(float, float *); __DEVICE__ double nan(const char *); __DEVICE__ float nanf(const char *); __DEVICE__ double nearbyint(double); __DEVICE__ float nearbyint(float); __DEVICE__ double nextafter(double, double); __DEVICE__ float nextafter(float, float); __DEVICE__ double pow(double, double); __DEVICE__ double pow(double, int); __DEVICE__ float pow(float, float); __DEVICE__ float pow(float, int); __DEVICE__ double remainder(double, double); __DEVICE__ float remainder(float, float); __DEVICE__ double remquo(double, double, int *); __DEVICE__ float remquo(float, float, int *); __DEVICE__ double rint(double); __DEVICE__ float rint(float); __DEVICE__ double round(double); __DEVICE__ float round(float); __DEVICE__ double scalbln(double, long); __DEVICE__ float scalbln(float, long); __DEVICE__ double scalbn(double, int); __DEVICE__ float scalbn(float, int); #ifdef _MSC_VER __DEVICE__ bool signbit(long double); #endif __DEVICE__ bool signbit(double); __DEVICE__ bool signbit(float); __DEVICE__ double sin(double); __DEVICE__ float sin(float); __DEVICE__ double sinh(double); __DEVICE__ float sinh(float); __DEVICE__ double sqrt(double); __DEVICE__ float sqrt(float); __DEVICE__ double tan(double); __DEVICE__ float tan(float); __DEVICE__ double tanh(double); __DEVICE__ float tanh(float); __DEVICE__ double tgamma(double); __DEVICE__ float tgamma(float); __DEVICE__ double trunc(double); __DEVICE__ float trunc(float); // Notably missing above is nexttoward, which we don't define on // the device side because libdevice doesn't give us an implementation, and we // don't want to be in the business of writing one ourselves. // We need to define these overloads in exactly the namespace our standard // library uses (including the right inline namespace), otherwise they won't be // picked up by other functions in the standard library (e.g. functions in // ). Thus the ugliness below. #ifdef _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_BEGIN_NAMESPACE_STD #else namespace std { #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION _GLIBCXX_BEGIN_NAMESPACE_VERSION #endif #endif using ::abs; using ::acos; using ::acosh; using ::asin; using ::asinh; using ::atan; using ::atan2; using ::atanh; using ::cbrt; using ::ceil; using ::copysign; using ::cos; using ::cosh; using ::erf; using ::erfc; using ::exp; using ::exp2; using ::expm1; using ::fabs; using ::fdim; using ::floor; using ::fma; using ::fmax; using ::fmin; using ::fmod; using ::fpclassify; using ::frexp; using ::hypot; using ::ilogb; using ::isfinite; using ::isgreater; using ::isgreaterequal; using ::isinf; using ::isless; using ::islessequal; using ::islessgreater; using ::isnan; using ::isnormal; using ::isunordered; using ::labs; using ::ldexp; using ::lgamma; using ::llabs; using ::llrint; using ::log; using ::log10; using ::log1p; using ::log2; using ::logb; using ::lrint; using ::lround; using ::llround; using ::modf; using ::nan; using ::nanf; using ::nearbyint; using ::nextafter; using ::pow; using ::remainder; using ::remquo; using ::rint; using ::round; using ::scalbln; using ::scalbn; using ::signbit; using ::sin; using ::sinh; using ::sqrt; using ::tan; using ::tanh; using ::tgamma; using ::trunc; #ifdef _LIBCPP_END_NAMESPACE_STD _LIBCPP_END_NAMESPACE_STD #else #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION _GLIBCXX_END_NAMESPACE_VERSION #endif } // namespace std #endif #pragma pop_macro("__DEVICE__") #endif /builtins/__clang_cuda_runtime_wrapper.h/*===---- __clang_cuda_runtime_wrapper.h - CUDA runtime support -------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ /* * WARNING: This header is intended to be directly -include'd by * the compiler and is not supposed to be included by users. * * CUDA headers are implemented in a way that currently makes it * impossible for user code to #include directly when compiling with * Clang. They present different view of CUDA-supplied functions * depending on where in NVCC's compilation pipeline the headers are * included. Neither of these modes provides function definitions with * correct attributes, so we use preprocessor to force the headers * into a form that Clang can use. * * Similarly to NVCC which -include's cuda_runtime.h, Clang -include's * this file during every CUDA compilation. */ #ifndef __CLANG_CUDA_RUNTIME_WRAPPER_H__ #define __CLANG_CUDA_RUNTIME_WRAPPER_H__ #if defined(__CUDA__) && defined(__clang__) // Include some forward declares that must come before cmath. #include <__clang_cuda_math_forward_declares.h> // Define __CUDACC__ early as libstdc++ standard headers with GNU extensions // enabled depend on it to avoid using __float128, which is unsupported in // CUDA. #define __CUDACC__ // Include some standard headers to avoid CUDA headers including them // while some required macros (like __THROW) are in a weird state. #include #include #include #include #undef __CUDACC__ // Preserve common macros that will be changed below by us or by CUDA // headers. #pragma push_macro("__THROW") #pragma push_macro("__CUDA_ARCH__") // WARNING: Preprocessor hacks below are based on specific details of // CUDA-7.x headers and are not expected to work with any other // version of CUDA headers. #include "cuda.h" #if !defined(CUDA_VERSION) #error "cuda.h did not define CUDA_VERSION" #elif CUDA_VERSION < 7000 #error "Unsupported CUDA version!" #endif #pragma push_macro("__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__") #if CUDA_VERSION >= 10000 #define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__ #endif // Make largest subset of device functions available during host // compilation. #ifndef __CUDA_ARCH__ #define __CUDA_ARCH__ 9999 #endif #include "__clang_cuda_builtin_vars.h" // No need for device_launch_parameters.h as __clang_cuda_builtin_vars.h above // has taken care of builtin variables declared in the file. #define __DEVICE_LAUNCH_PARAMETERS_H__ // {math,device}_functions.h only have declarations of the // functions. We don't need them as we're going to pull in their // definitions from .hpp files. #define __DEVICE_FUNCTIONS_H__ #define __MATH_FUNCTIONS_H__ #define __COMMON_FUNCTIONS_H__ // device_functions_decls is replaced by __clang_cuda_device_functions.h // included below. #define __DEVICE_FUNCTIONS_DECLS_H__ #undef __CUDACC__ #if CUDA_VERSION < 9000 #define __CUDABE__ #else #define __CUDACC__ #define __CUDA_LIBDEVICE__ #endif // Disables definitions of device-side runtime support stubs in // cuda_device_runtime_api.h #include "host_defines.h" #undef __CUDACC__ #include "driver_types.h" #include "host_config.h" // Temporarily replace "nv_weak" with weak, so __attribute__((nv_weak)) in // cuda_device_runtime_api.h ends up being __attribute__((weak)) which is the // functional equivalent of what we need. #pragma push_macro("nv_weak") #define nv_weak weak #undef __CUDABE__ #undef __CUDA_LIBDEVICE__ #define __CUDACC__ #include "cuda_runtime.h" #pragma pop_macro("nv_weak") #undef __CUDACC__ #define __CUDABE__ // CUDA headers use __nvvm_memcpy and __nvvm_memset which Clang does // not have at the moment. Emulate them with a builtin memcpy/memset. #define __nvvm_memcpy(s, d, n, a) __builtin_memcpy(s, d, n) #define __nvvm_memset(d, c, n, a) __builtin_memset(d, c, n) #if CUDA_VERSION < 9000 #include "crt/device_runtime.h" #endif #include "crt/host_runtime.h" // device_runtime.h defines __cxa_* macros that will conflict with // cxxabi.h. // FIXME: redefine these as __device__ functions. #undef __cxa_vec_ctor #undef __cxa_vec_cctor #undef __cxa_vec_dtor #undef __cxa_vec_new #undef __cxa_vec_new2 #undef __cxa_vec_new3 #undef __cxa_vec_delete2 #undef __cxa_vec_delete #undef __cxa_vec_delete3 #undef __cxa_pure_virtual // math_functions.hpp expects this host function be defined on MacOS, but it // ends up not being there because of the games we play here. Just define it // ourselves; it's simple enough. #ifdef __APPLE__ inline __host__ double __signbitd(double x) { return std::signbit(x); } #endif // CUDA 9.1 no longer provides declarations for libdevice functions, so we need // to provide our own. #include <__clang_cuda_libdevice_declares.h> // Wrappers for many device-side standard library functions, incl. math // functions, became compiler builtins in CUDA-9 and have been removed from the // CUDA headers. Clang now provides its own implementation of the wrappers. #if CUDA_VERSION >= 9000 #include <__clang_cuda_device_functions.h> #include <__clang_cuda_math.h> #endif // __THROW is redefined to be empty by device_functions_decls.h in CUDA. Clang's // counterpart does not do it, so we need to make it empty here to keep // following CUDA includes happy. #undef __THROW #define __THROW // CUDA 8.0.41 relies on __USE_FAST_MATH__ and __CUDA_PREC_DIV's values. // Previous versions used to check whether they are defined or not. // CU_DEVICE_INVALID macro is only defined in 8.0.41, so we use it // here to detect the switch. #if defined(CU_DEVICE_INVALID) #if !defined(__USE_FAST_MATH__) #define __USE_FAST_MATH__ 0 #endif #if !defined(__CUDA_PREC_DIV) #define __CUDA_PREC_DIV 0 #endif #endif // Temporarily poison __host__ macro to ensure it's not used by any of // the headers we're about to include. #pragma push_macro("__host__") #define __host__ UNEXPECTED_HOST_ATTRIBUTE // device_functions.hpp and math_functions*.hpp use 'static // __forceinline__' (with no __device__) for definitions of device // functions. Temporarily redefine __forceinline__ to include // __device__. #pragma push_macro("__forceinline__") #define __forceinline__ __device__ __inline__ __attribute__((always_inline)) #if CUDA_VERSION < 9000 #include "device_functions.hpp" #endif // math_function.hpp uses the __USE_FAST_MATH__ macro to determine whether we // get the slow-but-accurate or fast-but-inaccurate versions of functions like // sin and exp. This is controlled in clang by -fcuda-approx-transcendentals. // // device_functions.hpp uses __USE_FAST_MATH__ for a different purpose (fast vs. // slow divides), so we need to scope our define carefully here. #pragma push_macro("__USE_FAST_MATH__") #if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__) #define __USE_FAST_MATH__ 1 #endif #if CUDA_VERSION >= 9000 #include "crt/math_functions.hpp" #else #include "math_functions.hpp" #endif #pragma pop_macro("__USE_FAST_MATH__") #if CUDA_VERSION < 9000 #include "math_functions_dbl_ptx3.hpp" #endif #pragma pop_macro("__forceinline__") // Pull in host-only functions that are only available when neither // __CUDACC__ nor __CUDABE__ are defined. #undef __MATH_FUNCTIONS_HPP__ #undef __CUDABE__ #if CUDA_VERSION < 9000 #include "math_functions.hpp" #endif // Alas, additional overloads for these functions are hard to get to. // Considering that we only need these overloads for a few functions, // we can provide them here. static inline float rsqrt(float __a) { return rsqrtf(__a); } static inline float rcbrt(float __a) { return rcbrtf(__a); } static inline float sinpi(float __a) { return sinpif(__a); } static inline float cospi(float __a) { return cospif(__a); } static inline void sincospi(float __a, float *__b, float *__c) { return sincospif(__a, __b, __c); } static inline float erfcinv(float __a) { return erfcinvf(__a); } static inline float normcdfinv(float __a) { return normcdfinvf(__a); } static inline float normcdf(float __a) { return normcdff(__a); } static inline float erfcx(float __a) { return erfcxf(__a); } #if CUDA_VERSION < 9000 // For some reason single-argument variant is not always declared by // CUDA headers. Alas, device_functions.hpp included below needs it. static inline __device__ void __brkpt(int __c) { __brkpt(); } #endif // Now include *.hpp with definitions of various GPU functions. Alas, // a lot of thins get declared/defined with __host__ attribute which // we don't want and we have to define it out. We also have to include // {device,math}_functions.hpp again in order to extract the other // branch of #if/else inside. #define __host__ #undef __CUDABE__ #define __CUDACC__ #if CUDA_VERSION >= 9000 // Some atomic functions became compiler builtins in CUDA-9 , so we need their // declarations. #include "device_atomic_functions.h" #endif #undef __DEVICE_FUNCTIONS_HPP__ #include "device_atomic_functions.hpp" #if CUDA_VERSION >= 9000 #include "crt/device_functions.hpp" #include "crt/device_double_functions.hpp" #else #include "device_functions.hpp" #define __CUDABE__ #include "device_double_functions.h" #undef __CUDABE__ #endif #include "sm_20_atomic_functions.hpp" // Predicate functions used in `__builtin_assume` need to have no side effect. // However, sm_20_intrinsics.hpp doesn't define them with neither pure nor // const attribute. Rename definitions from sm_20_intrinsics.hpp and re-define // them as pure ones. #pragma push_macro("__isGlobal") #pragma push_macro("__isShared") #pragma push_macro("__isConstant") #pragma push_macro("__isLocal") #define __isGlobal __ignored_cuda___isGlobal #define __isShared __ignored_cuda___isShared #define __isConstant __ignored_cuda___isConstant #define __isLocal __ignored_cuda___isLocal #include "sm_20_intrinsics.hpp" #pragma pop_macro("__isGlobal") #pragma pop_macro("__isShared") #pragma pop_macro("__isConstant") #pragma pop_macro("__isLocal") #pragma push_macro("__DEVICE__") #define __DEVICE__ static __device__ __forceinline__ __attribute__((const)) __DEVICE__ unsigned int __isGlobal(const void *p) { return __nvvm_isspacep_global(p); } __DEVICE__ unsigned int __isShared(const void *p) { return __nvvm_isspacep_shared(p); } __DEVICE__ unsigned int __isConstant(const void *p) { return __nvvm_isspacep_const(p); } __DEVICE__ unsigned int __isLocal(const void *p) { return __nvvm_isspacep_local(p); } #pragma pop_macro("__DEVICE__") #include "sm_32_atomic_functions.hpp" // Don't include sm_30_intrinsics.h and sm_32_intrinsics.h. These define the // __shfl and __ldg intrinsics using inline (volatile) asm, but we want to // define them using builtins so that the optimizer can reason about and across // these instructions. In particular, using intrinsics for ldg gets us the // [addr+imm] addressing mode, which, although it doesn't actually exist in the // hardware, seems to generate faster machine code because ptxas can more easily // reason about our code. #if CUDA_VERSION >= 8000 #pragma push_macro("__CUDA_ARCH__") #undef __CUDA_ARCH__ #include "sm_60_atomic_functions.hpp" #include "sm_61_intrinsics.hpp" #pragma pop_macro("__CUDA_ARCH__") #endif #undef __MATH_FUNCTIONS_HPP__ // math_functions.hpp defines ::signbit as a __host__ __device__ function. This // conflicts with libstdc++'s constexpr ::signbit, so we have to rename // math_function.hpp's ::signbit. It's guarded by #undef signbit, but that's // conditional on __GNUC__. :) #pragma push_macro("signbit") #pragma push_macro("__GNUC__") #undef __GNUC__ #define signbit __ignored_cuda_signbit // CUDA-9 omits device-side definitions of some math functions if it sees // include guard from math.h wrapper from libstdc++. We have to undo the header // guard temporarily to get the definitions we need. #pragma push_macro("_GLIBCXX_MATH_H") #pragma push_macro("_LIBCPP_VERSION") #if CUDA_VERSION >= 9000 #undef _GLIBCXX_MATH_H // We also need to undo another guard that checks for libc++ 3.8+ #ifdef _LIBCPP_VERSION #define _LIBCPP_VERSION 3700 #endif #endif #if CUDA_VERSION >= 9000 #include "crt/math_functions.hpp" #else #include "math_functions.hpp" #endif #pragma pop_macro("_GLIBCXX_MATH_H") #pragma pop_macro("_LIBCPP_VERSION") #pragma pop_macro("__GNUC__") #pragma pop_macro("signbit") #pragma pop_macro("__host__") // __clang_cuda_texture_intrinsics.h must be included first in order to provide // implementation for __nv_tex_surf_handler that CUDA's headers depend on. // The implementation requires c++11 and only works with CUDA-9 or newer. #if __cplusplus >= 201103L && CUDA_VERSION >= 9000 // clang-format off #include <__clang_cuda_texture_intrinsics.h> // clang-format on #else #if CUDA_VERSION >= 9000 // Provide a hint that texture support needs C++11. template struct __nv_tex_needs_cxx11 { const static bool value = false; }; template __host__ __device__ void __nv_tex_surf_handler(const char *name, T *ptr, cudaTextureObject_t obj, float x) { _Static_assert(__nv_tex_needs_cxx11::value, "Texture support requires C++11"); } #else // Textures in CUDA-8 and older are not supported by clang.There's no // convenient way to intercept texture use in these versions, so we can't // produce a meaningful error. The source code that attempts to use textures // will continue to fail as it does now. #endif // CUDA_VERSION #endif // __cplusplus >= 201103L && CUDA_VERSION >= 9000 #include "texture_fetch_functions.h" #include "texture_indirect_functions.h" // Restore state of __CUDA_ARCH__ and __THROW we had on entry. #pragma pop_macro("__CUDA_ARCH__") #pragma pop_macro("__THROW") // Set up compiler macros expected to be seen during compilation. #undef __CUDABE__ #define __CUDACC__ extern "C" { // Device-side CUDA system calls. // http://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls // We need these declarations and wrappers for device-side // malloc/free/printf calls to work without relying on // -fcuda-disable-target-call-checks option. __device__ int vprintf(const char *, const char *); __device__ void free(void *) __attribute((nothrow)); __device__ void *malloc(size_t) __attribute((nothrow)) __attribute__((malloc)); // __assertfail() used to have a `noreturn` attribute. Unfortunately that // contributed to triggering the longstanding bug in ptxas when assert was used // in sufficiently convoluted code. See // https://bugs.llvm.org/show_bug.cgi?id=27738 for the details. __device__ void __assertfail(const char *__message, const char *__file, unsigned __line, const char *__function, size_t __charSize); // In order for standard assert() macro on linux to work we need to // provide device-side __assert_fail() __device__ static inline void __assert_fail(const char *__message, const char *__file, unsigned __line, const char *__function) { __assertfail(__message, __file, __line, __function, sizeof(char)); } // Clang will convert printf into vprintf, but we still need // device-side declaration for it. __device__ int printf(const char *, ...); } // extern "C" // We also need device-side std::malloc and std::free. namespace std { __device__ static inline void free(void *__ptr) { ::free(__ptr); } __device__ static inline void *malloc(size_t __size) { return ::malloc(__size); } } // namespace std // Out-of-line implementations from __clang_cuda_builtin_vars.h. These need to // come after we've pulled in the definition of uint3 and dim3. __device__ inline __cuda_builtin_threadIdx_t::operator dim3() const { return dim3(x, y, z); } __device__ inline __cuda_builtin_threadIdx_t::operator uint3() const { return {x, y, z}; } __device__ inline __cuda_builtin_blockIdx_t::operator dim3() const { return dim3(x, y, z); } __device__ inline __cuda_builtin_blockIdx_t::operator uint3() const { return {x, y, z}; } __device__ inline __cuda_builtin_blockDim_t::operator dim3() const { return dim3(x, y, z); } __device__ inline __cuda_builtin_blockDim_t::operator uint3() const { return {x, y, z}; } __device__ inline __cuda_builtin_gridDim_t::operator dim3() const { return dim3(x, y, z); } __device__ inline __cuda_builtin_gridDim_t::operator uint3() const { return {x, y, z}; } #include <__clang_cuda_cmath.h> #include <__clang_cuda_intrinsics.h> #include <__clang_cuda_complex_builtins.h> // curand_mtgp32_kernel helpfully redeclares blockDim and threadIdx in host // mode, giving them their "proper" types of dim3 and uint3. This is // incompatible with the types we give in __clang_cuda_builtin_vars.h. As as // hack, force-include the header (nvcc doesn't include it by default) but // redefine dim3 and uint3 to our builtin types. (Thankfully dim3 and uint3 are // only used here for the redeclarations of blockDim and threadIdx.) #pragma push_macro("dim3") #pragma push_macro("uint3") #define dim3 __cuda_builtin_blockDim_t #define uint3 __cuda_builtin_threadIdx_t #include "curand_mtgp32_kernel.h" #pragma pop_macro("dim3") #pragma pop_macro("uint3") #pragma pop_macro("__USE_FAST_MATH__") #pragma pop_macro("__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__") // CUDA runtime uses this undocumented function to access kernel launch // configuration. The declaration is in crt/device_functions.h but that file // includes a lot of other stuff we don't want. Instead, we'll provide our own // declaration for it here. #if CUDA_VERSION >= 9020 extern "C" unsigned __cudaPushCallConfiguration(dim3 gridDim, dim3 blockDim, size_t sharedMem = 0, void *stream = 0); #endif #endif // __CUDA__ #endif // __CLANG_CUDA_RUNTIME_WRAPPER_H__ /builtins/__clang_cuda_texture_intrinsics.h/*===--- __clang_cuda_texture_intrinsics.h - Device-side texture support ---=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== * * This header provides in-header implmentations for NVCC's built-in * __nv_tex_surf_handler() which is used by CUDA's texture-related headers. The * built-in is unusual as it's actually a set of function overloads that use the * first string literal argument as one of the overload parameters. */ #ifndef __CLANG_CUDA_TEXTURE_INTRINSICS_H__ #define __CLANG_CUDA_TEXTURE_INTRINSICS_H__ #ifndef __CUDA__ #error "This file is for CUDA compilation only." #endif // __nv_tex_surf_handler() provided by this header as a macro. #define __nv_tex_surf_handler(__op, __ptr, ...) \ ::__cuda_tex::__tex_fetch< \ ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash(__op)>>(__ptr, \ __VA_ARGS__) #pragma push_macro("__ASM_OUT") #pragma push_macro("__ASM_OUTP") #pragma push_macro("__Args") #pragma push_macro("__ID") #pragma push_macro("__IDV") #pragma push_macro("__IMPL_2DGATHER") #pragma push_macro("__IMPL_ALIAS") #pragma push_macro("__IMPL_ALIASI") #pragma push_macro("__IMPL_F1") #pragma push_macro("__IMPL_F3") #pragma push_macro("__IMPL_F3N") #pragma push_macro("__IMPL_F3S") #pragma push_macro("__IMPL_S") #pragma push_macro("__IMPL_S3") #pragma push_macro("__IMPL_S3I") #pragma push_macro("__IMPL_S3N") #pragma push_macro("__IMPL_S3NI") #pragma push_macro("__IMPL_S3S") #pragma push_macro("__IMPL_S3SI") #pragma push_macro("__IMPL_SI") #pragma push_macro("__L") #pragma push_macro("__STRIP_PARENS") // Put all functions into anonymous namespace so they have internal linkage. // The device-only function here must be internal in order to avoid ODR // violations in case they are used from the files compiled with // -fgpu-rdc. E.g. a library and an app using it may be built with a different // version of this header file. namespace { // Put the implmentation into its own namespace so we don't pollute the TU. namespace __cuda_tex { // First, we need a perfect hash function and a few constexpr helper functions // for converting a string literal into a numeric value which can be used to // parametrize a template. We can not use string literals for that as that would // require C++20. // // The hash function was generated with 'gperf' and then manually converted into // its constexpr equivalent. // // NOTE: the perfect hashing scheme comes with inherent self-test. If the hash // function has a collision for any of the texture operations, the compilation // will fail due to an attempt to redefine a tag with the same value. If the // header compiles, then the hash function is good enough for the job. constexpr int __tex_len(const char *s) { return (s[0] == 0) ? 0 : (s[1] == 0) ? 1 : (s[2] == 0) ? 2 : (s[3] == 0) ? 3 : (s[4] == 0) ? 4 : (s[5] == 0) ? 5 : (s[6] == 0) ? 6 : (s[7] == 0) ? 7 : (s[8] == 0) ? 8 : (s[9] == 0) ? 9 : (s[10] == 0) ? 10 : (s[11] == 0) ? 11 : (s[12] == 0) ? 12 : (s[13] == 0) ? 13 : (s[14] == 0) ? 14 : (s[15] == 0) ? 15 : (s[16] == 0) ? 16 : (s[17] == 0) ? 17 : (s[18] == 0) ? 18 : (s[19] == 0) ? 19 : (s[20] == 0) ? 20 : (s[21] == 0) ? 21 : (s[22] == 0) ? 22 : (s[23] == 0) ? 23 : (s[24] == 0) ? 24 : (s[25] == 0) ? 25 : (s[26] == 0) ? 26 : (s[27] == 0) ? 27 : (s[28] == 0) ? 28 : (s[29] == 0) ? 29 : (s[30] == 0) ? 30 : (s[31] == 0) ? 31 : 32; } constexpr int __tex_hash_map(int c) { return (c == 49) ? 10 : (c == 50) ? 0 : (c == 51) ? 100 : (c == 52) ? 30 : (c == 67) ? 10 : (c == 68) ? 0 : (c == 69) ? 25 : (c == 72) ? 70 : (c == 77) ? 0 : (c == 96) ? 44 : (c == 99) ? 10 : (c == 100) ? 5 : (c == 101) ? 60 : (c == 102) ? 40 : (c == 103) ? 70 : (c == 104) ? 25 : (c == 112) ? 0 : (c == 114) ? 45 : (c == 117) ? 5 : (c == 118) ? 85 : (c == 120) ? 20 : 225; } constexpr int __tex_op_hash(const char *str) { return __tex_len(str) + __tex_hash_map(str[7] + 1) + __tex_hash_map(str[6]) + __tex_hash_map(str[5]) + __tex_hash_map(str[__tex_len(str) - 1]); } // Tag type to identify particular texture operation. template struct __Tag; #define __ID(__op) __Tag<__tex_op_hash(__op)> // Tags for variants of particular operation. E.g. tex2Dgather can translate // into 4 different instructions. #define __IDV(__op, __variant) \ __Tag<10000 + __tex_op_hash(__op) * 100 + __variant> // Helper classes for figuring out key data types for derived types. // E.g. char2 has __base_t = char, __fetch_t = char4 template struct __TypeInfoT; // Type info for the fundamental types. template <> struct __TypeInfoT { using __base_t = float; using __fetch_t = float4; }; template <> struct __TypeInfoT { using __base_t = char; using __fetch_t = int4; }; template <> struct __TypeInfoT { using __base_t = signed char; using __fetch_t = int4; }; template <> struct __TypeInfoT { using __base_t = unsigned char; using __fetch_t = uint4; }; template <> struct __TypeInfoT { using __base_t = short; using __fetch_t = int4; }; template <> struct __TypeInfoT { using __base_t = unsigned short; using __fetch_t = uint4; }; template <> struct __TypeInfoT { using __base_t = int; using __fetch_t = int4; }; template <> struct __TypeInfoT { using __base_t = unsigned int; using __fetch_t = uint4; }; // Derived base/fetch types for N-element vectors. template struct __TypeInfoT { using __base_t = decltype(__T::x); using __fetch_t = typename __TypeInfoT<__base_t>::__fetch_t; }; // Classes that implement specific texture ops. template struct __tex_fetch_v4; // Helper macros to strip parens from a macro argument. #define __Args(...) __VA_ARGS__ #define __STRIP_PARENS(__X) __X #define __L(__X) __STRIP_PARENS(__Args __X) // Construct inline assembly output args. // Results are stored in a temp var __r. // isResident bool is pointed to by __ir // Asm args for return values. It's a 4-element vector #define __ASM_OUT(__t) \ ("=" __t(__r.x), "=" __t(__r.y), "=" __t(__r.z), "=" __t(__r.w)) // .. possibly combined with a predicate. #define __ASM_OUTP(__t) (__L(__ASM_OUT(__t)), "=h"(*__ir)) // Implements a single variant of texture fetch instruction. #define __IMPL_F1(__rt, __dt, __args, __asm_op, __asm_outs, __asm_args) \ template <> \ __device__ __rt __run<__dt>(cudaTextureObject_t __obj, __L(__args)) { \ __rt __r; \ asm(__asm_op : __L(__asm_outs) : "l"(__obj), __L(__asm_args)); \ return __r; \ } // Implements texture fetch instructions for int4/uint4/float4 data types. #define __IMPL_F3(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \ __IMPL_F1(int4, int4, __args, __asm_op ".s32." __ctype "\t" __asm_op_args, \ __ASM_OUT("r"), __asm_args) \ __IMPL_F1(uint4, uint4, __args, __asm_op ".u32." __ctype "\t" __asm_op_args, \ __ASM_OUT("r"), __asm_args) \ __IMPL_F1(float4, float4, __args, \ __asm_op ".f32." __ctype "\t" __asm_op_args, __ASM_OUT("f"), \ __asm_args) // Implements 'sparse' texture fetch instructions for int4/uint4/float4 data // types. Similar to above, but returns a boolean 'isPresent' value in addition // to texture data, #define __IMPL_F3S(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \ __IMPL_F1(int4, int4, __args, __asm_op ".s32." __ctype "\t" __asm_op_args, \ __ASM_OUTP("r"), __asm_args) \ __IMPL_F1(uint4, uint4, __args, __asm_op ".u32." __ctype "\t" __asm_op_args, \ __ASM_OUTP("r"), __asm_args) \ __IMPL_F1(float4, float4, __args, \ __asm_op ".f32." __ctype "\t" __asm_op_args, __ASM_OUTP("f"), \ __asm_args) // Similar to F3, but for integer data which is returned as normalized floats. // Only instantiates fetch functions for int4/uint4. #define __IMPL_F3N(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \ __IMPL_F1(float4, int4, __args, __asm_op ".s32." __ctype "\t" __asm_op_args, \ __ASM_OUT("r"), __asm_args) \ __IMPL_F1(float4, uint4, __args, \ __asm_op ".u32." __ctype "\t" __asm_op_args, __ASM_OUT("r"), \ __asm_args) // Instantiates __tex_fetch_v4 with regular fetch functions. #define __IMPL_S3I(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args) \ template <> struct __tex_fetch_v4<__op> { \ template \ __device__ static T __run(cudaTextureObject_t __obj, __L(__args)); \ __IMPL_F3(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \ } // Same, but for sparse ops. Only available on sm_60+ #if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 600) #define __IMPL_S3SI(__op, __args, __asm_op, __ctype, __asm_op_args, \ __asm_args) \ template <> struct __tex_fetch_v4<__op> { \ template \ __device__ static T __run(cudaTextureObject_t __obj, __L(__args)); \ __IMPL_F3S(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \ } #else #define __IMPL_S3SI(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args) #endif // Same, but for normalized float ops. #define __IMPL_S3NI(__op, __args, __asm_op, __ctype, __asm_op_args, \ __asm_args) \ template <> struct __tex_fetch_v4<__op> { \ template \ __device__ static float4 __run(cudaTextureObject_t __obj, __L(__args)); \ __IMPL_F3N(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \ } // Regular and normalized float ops share a lot of similarities. This macro // instantiates both variants -- normal for __op and normalized for __opn. #define __IMPL_SI(__op, __opn, __args, __asm_op, __ctype, __asm_op_args, \ __asm_args) \ __IMPL_S3I(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args); \ __IMPL_S3NI(__opn, __args, __asm_op, __ctype, __asm_op_args, __asm_args) // Convenience macros which converts string literal __op into a __Tag, #define __IMPL_S3(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args) \ __IMPL_S3I(__ID(__op), __args, __asm_op, __ctype, __asm_op_args, __asm_args) #define __IMPL_S3S(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args) \ __IMPL_S3SI(__ID(__op), __args, __asm_op, __ctype, __asm_op_args, __asm_args) #define __IMPL_S3N(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args) \ __IMPL_S3NI(__ID(__op), __args, __asm_op, __ctype, __asm_op_args, __asm_args) #define __IMPL_S(__op, __opn, __args, __asm_op, __ctype, __asm_op_args, \ __asm_args) \ __IMPL_SI(__ID(__op), __ID(__opn), __args, __asm_op, __ctype, __asm_op_args, \ __asm_args) // CUDA headers have some 'legacy' texture oprerations that duplicate // functionality. So, we just inherit it, instead of refining a copy. #define __IMPL_ALIASI(__op, __opn) \ template <> struct __tex_fetch_v4<__op> : __tex_fetch_v4<__opn> {} #define __IMPL_ALIAS(__op, __opn) __IMPL_ALIASI(__ID(__op), __ID(__opn)) // Now we can instantiate everything we need for each specific texture fetch // variant. __IMPL_S("__tex1D_v2", "__tex1D_rmnf_v2", (float __x), "tex.1d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5}];", ("f"(__x))); __IMPL_S("__tex1Dfetch_v2", "__tex1Dfetch_rmnf_v2", (int __x), "tex.1d.v4", "s32", "{%0, %1, %2, %3}, [%4, {%5}];", ("r"(__x))); __IMPL_ALIAS("__itex1D", "__tex1D_v2"); __IMPL_ALIAS("__itex1Dfetch", "__tex1Dfetch_v2"); __IMPL_S("__tex1DGrad_v2", "__tex1DGrad_rmnf_v2", (float __x, float __dPdx, float __dPdy), "tex.grad.1d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5}], {%6}, {%7};", ("f"(__x), "f"(__dPdx), "f"(__dPdy))); __IMPL_ALIAS("__itex1DGrad", "__tex1DGrad_v2"); __IMPL_S("__tex1DLayered_v2", "__tex1DLayered_rmnf_v2", (float __x, int __layer), "tex.a1d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6}];", ("r"(__layer), "f"(__x))); __IMPL_ALIAS("__itex1DLayered", "__tex1DLayered_v2"); __IMPL_S("__tex1DLayeredGrad_v2", "__tex1DLayeredGrad_rmnf_v2", (float __x, int __layer, float __dPdx, float __dPdy), "tex.grad.a1d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6}], {%7}, {%8};", ("r"(__layer), "f"(__x), "f"(__dPdx), "f"(__dPdy))); __IMPL_ALIAS("__itex1DLayeredGrad", "__tex1DLayeredGrad_v2"); __IMPL_S("__tex1DLayeredLod_v2", "__tex1DLayeredLod_rmnf_v2", (float __x, int __layer, float __level), "tex.level.a1d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6}], %7;", ("r"(__layer), "f"(__x), "f"(__level))); __IMPL_ALIAS("__itex1DLayeredLod", "__tex1DLayeredLod_v2"); __IMPL_S("__tex1DLod_v2", "__tex1DLod_rmnf_v2", (float __x, float __level), "tex.level.1d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5}], %6;", ("f"(__x), "f"(__level))); __IMPL_ALIAS("__itex1DLod", "__tex1DLod_v2"); // 2D __IMPL_S("__tex2D_v2", "__tex2D_rmnf_v2", (float __x, float __y), "tex.2d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6}];", ("f"(__x), "f"(__y))); __IMPL_ALIAS("__itex2D", "__tex2D_v2"); __IMPL_S3S("__itex2D_sparse", (float __x, float __y, unsigned char *__ir), "{.reg .pred %%p0;\n\t" "tex.2d.v4", "f32", "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t" " selp.u16 %4, 1, 0, %%p0; }", ("f"(__x), "f"(__y))); __IMPL_S("__tex2DGrad_v2", "__tex2DGrad_rmnf_v2", (float __x, float __y, const float2 *__dPdx, const float2 *__dPdy), "tex.grad.2d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6}], {%7, %8}, {%9, %10};", ("f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y))); __IMPL_ALIAS("__itex2DGrad_v2", "__tex2DGrad_v2"); __IMPL_S3S("__itex2DGrad_sparse", (float __x, float __y, const float2 *__dPdx, const float2 *__dPdy, unsigned char *__ir), "{.reg .pred %%p0;\n\t" "tex.grad.2d.v4", "f32", "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}], {%8, %9}, {%10, %11};\n\t" "selp.u16 %4, 1, 0, %%p0; }", ("f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y))); __IMPL_S("__tex2DLayered_v2", "__tex2DLayered_rmnf_v2", (float __x, float __y, int __layer), "tex.a2d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];", ("r"(__layer), "f"(__x), "f"(__y))); __IMPL_ALIAS("__itex2DLayered", "__tex2DLayered_v2"); __IMPL_S3S("__itex2DLayered_sparse", (float __x, float __y, int __layer, unsigned char *__ir), "{.reg .pred %%p0;\n\t" "tex.a2d.v4", "f32", "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t" "selp.u16 %4, 1, 0, %%p0; }", ("r"(__layer), "f"(__x), "f"(__y))); __IMPL_S("__tex2DLayeredGrad_v2", "__tex2DLayeredGrad_rmnf_v2", (float __x, float __y, int __layer, const float2 *__dPdx, const float2 *__dPdy), "tex.grad.a2d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], {%8, %9}, {%10, %11};", ("r"(__layer), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y))); __IMPL_ALIAS("__itex2DLayeredGrad_v2", "__tex2DLayeredGrad_v2"); __IMPL_S3S( "__itex2DLayeredGrad_sparse", (float __x, float __y, int __layer, const float2 *__dPdx, const float2 *__dPdy, unsigned char *__ir), "{.reg .pred %%p0;\n\t" "tex.grad.a2d.v4", "f32", "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], {%9, %10}, {%11, %12};\n\t" "selp.u16 %4, 1, 0, %%p0; }", ("r"(__layer), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y))); __IMPL_S("__tex2DLayeredLod_v2", "__tex2DLayeredLod_rmnf_v2", (float __x, float __y, int __layer, float __level), "tex.level.a2d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;", ("r"(__layer), "f"(__x), "f"(__y), "f"(__level))); __IMPL_ALIAS("__itex2DLayeredLod", "__tex2DLayeredLod_v2"); __IMPL_S3S("__itex2DLayeredLod_sparse", (float __x, float __y, int __layer, float __level, unsigned char *__ir), "{.reg .pred %%p0;\n\t" "tex.level.a2d.v4", "f32", "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], %9;\n\t" "selp.u16 %4, 1, 0, %%p0; }", ("r"(__layer), "f"(__x), "f"(__y), "f"(__level))); __IMPL_S("__tex2DLod_v2", "__tex2DLod_rmnf_v2", (float __x, float __y, float __level), "tex.level.2d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6}], %7;", ("f"(__x), "f"(__y), "f"(__level))); __IMPL_ALIAS("__itex2DLod", "__tex2DLod_v2"); __IMPL_S3S("__itex2DLod_sparse", (float __x, float __y, float __level, unsigned char *__ir), "{.reg .pred %%p0;\n\t" "tex.level.2d.v4", "f32", "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}], %8;\n\t" "selp.u16 %4, 1, 0, %%p0; }", ("f"(__x), "f"(__y), "f"(__level))); // 2D gather is special. Unlike other variants that translate into exactly one // asm instruction, it uses one of the four different instructions selected by // __comp. We implement each instruction variant separately, and dispatch the // right one from the manually implemented 'umbrella' fetch. #define __IMPL_2DGATHER(variant, instr) \ __IMPL_SI(__IDV("__tex2Dgather_v2", variant), \ __IDV("__tex2Dgather_rmnf_v2", variant), \ (float __x, float __y, int __comp), instr, "f32", \ "{%0, %1, %2, %3}, [%4, {%5, %6}];", ("f"(__x), "f"(__y))); \ __IMPL_ALIASI(__IDV("__itex2Dgather", variant), \ __IDV("__tex2Dgather_v2", variant)); \ __IMPL_S3SI(__IDV("__itex2Dgather_sparse", variant), \ (float __x, float __y, unsigned char *__ir, int __comp), \ "{.reg .pred %%p0;\n\t" instr, "f32", \ "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t" \ "selp.u16 %4, 1, 0, %%p0; }", \ ("f"(__x), "f"(__y))); __IMPL_2DGATHER(0, "tld4.r.2d.v4"); __IMPL_2DGATHER(1, "tld4.g.2d.v4"); __IMPL_2DGATHER(2, "tld4.b.2d.v4"); __IMPL_2DGATHER(3, "tld4.a.2d.v4"); // Umbrella dispatcher -- calls into specific 2Dgather variant. template <> struct __tex_fetch_v4<__ID("__tex2Dgather_v2")> { template __device__ static __T __run(cudaTextureObject_t __obj, float __x, float __y, int __comp) { switch (__comp) { case 0: return __tex_fetch_v4<__IDV("__tex2Dgather_v2", 0)>::__run<__T>( __obj, __x, __y, __comp); case 1: return __tex_fetch_v4<__IDV("__tex2Dgather_v2", 1)>::__run<__T>( __obj, __x, __y, __comp); case 2: return __tex_fetch_v4<__IDV("__tex2Dgather_v2", 2)>::__run<__T>( __obj, __x, __y, __comp); case 3: return __tex_fetch_v4<__IDV("__tex2Dgather_v2", 3)>::__run<__T>( __obj, __x, __y, __comp); } } }; __IMPL_ALIAS("__itex2Dgather", "__tex2Dgather_v2"); template <> struct __tex_fetch_v4<__ID("__tex2Dgather_rmnf_v2")> { template __device__ static float4 __run(cudaTextureObject_t __obj, float __x, float __y, int __comp) { switch (__comp) { case 0: return __tex_fetch_v4<__IDV("__tex2Dgather_rmnf_v2", 0)>::__run<__T>( __obj, __x, __y, __comp); case 1: return __tex_fetch_v4<__IDV("__tex2Dgather_rmnf_v2", 1)>::__run<__T>( __obj, __x, __y, __comp); case 2: return __tex_fetch_v4<__IDV("__tex2Dgather_rmnf_v2", 2)>::__run<__T>( __obj, __x, __y, __comp); case 3: return __tex_fetch_v4<__IDV("__tex2Dgather_rmnf_v2", 3)>::__run<__T>( __obj, __x, __y, __comp); } } }; #if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 600) template <> struct __tex_fetch_v4<__ID("__itex2Dgather_sparse")> { template __device__ static __T __run(cudaTextureObject_t __obj, float __x, float __y, unsigned char *__ir, int __comp) { switch (__comp) { case 0: return __tex_fetch_v4<__IDV("__itex2Dgather_sparse", 0)>::__run<__T>( __obj, __x, __y, __ir, __comp); case 1: return __tex_fetch_v4<__IDV("__itex2Dgather_sparse", 1)>::__run<__T>( __obj, __x, __y, __ir, __comp); case 2: return __tex_fetch_v4<__IDV("__itex2Dgather_sparse", 2)>::__run<__T>( __obj, __x, __y, __ir, __comp); case 3: return __tex_fetch_v4<__IDV("__itex2Dgather_sparse", 3)>::__run<__T>( __obj, __x, __y, __ir, __comp); } } }; #endif // 3D __IMPL_S("__tex3D_v2", "__tex3D_rmnf_v2", (float __x, float __y, float __z), "tex.3d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];", ("f"(__x), "f"(__y), "f"(__z))); __IMPL_ALIAS("__itex3D", "__tex3D_v2"); __IMPL_S3S("__itex3D_sparse", (float __x, float __y, float __z, unsigned char *__ir), "{.reg .pred %%p0;\n\t" "tex.3d.v4", "f32", "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t" "selp.u16 %4, 1, 0, %%p0; }", ("f"(__x), "f"(__y), "f"(__z))); __IMPL_S("__tex3DGrad_v2", "__tex3DGrad_rmnf_v2", (float __x, float __y, float __z, const float4 *__dPdx, const float4 *__dPdy), "tex.grad.3d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], " "{%8, %9, %10, %10}, {%11, %12, %13, %13};", ("f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), "f"(__dPdy->z))); __IMPL_ALIAS("__itex3DGrad_v2", "__tex3DGrad_v2"); __IMPL_S3S("__itex3DGrad_sparse", (float __x, float __y, float __z, const float4 *__dPdx, const float4 *__dPdy, unsigned char *__ir), "{.reg .pred %%p0;\n\t" "tex.grad.3d.v4", "f32", "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], " "{%9, %10, %11, %11}, {%12, %13, %14, %14};\n\t" "selp.u16 %4, 1, 0, %%p0; }", ("f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), "f"(__dPdy->z))); __IMPL_S("__tex3DLod_v2", "__tex3DLod_rmnf_v2", (float __x, float __y, float __z, float __level), "tex.level.3d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;", ("f"(__x), "f"(__y), "f"(__z), "f"(__level))); __IMPL_ALIAS("__itex3DLod", "__tex3DLod_v2"); __IMPL_S3S("__itex3DLod_sparse", (float __x, float __y, float __z, float __level, unsigned char *__ir), "{.reg .pred %%p0;\n\t" "tex.level.3d.v4", "f32", "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], %9;\n\t" "selp.u16 %4, 1, 0, %%p0; }", ("f"(__x), "f"(__y), "f"(__z), "f"(__level))); // Cubemap __IMPL_S("__texCubemap_v2", "__texCubemap_rmnf_v2", (float __x, float __y, float __z), "tex.cube.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];", ("f"(__x), "f"(__y), "f"(__z))); __IMPL_ALIAS("__itexCubemap", "__texCubemap_v2"); __IMPL_S3S("__itexCubemap_sparse", (float __x, float __y, float __z, unsigned char *__ir), "{.reg .pred %%p0;\n\t" "tex.cube.v4", "f32", "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t" "selp.u16 %4, 1, 0, %%p0; }", ("f"(__x), "f"(__y), "f"(__z))); __IMPL_S("__texCubemapGrad_v2", "__texCubemapGrad_rmnf_v2", (float __x, float __y, float __z, const float4 *__dPdx, const float4 *__dPdy), "tex.grad.cube.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], " "{%8, %9, %10, %10}, {%11, %12, %13, %13};", ("f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), "f"(__dPdy->z))); __IMPL_ALIAS("__itexCubemapGrad_v2", "__texCubemapGrad_v2"); __IMPL_S("__texCubemapLayered_v2", "__texCubemapLayered_rmnf_v2", (float __x, float __y, float __z, int __layer), "tex.acube.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];", ("r"(__layer), "f"(__x), "f"(__y), "f"(__z))); __IMPL_ALIAS("__itexCubemapLayered", "__texCubemapLayered_v2"); __IMPL_S("__texCubemapLayeredGrad_v2", "__texCubemapLayeredGrad_rmnf_v2", (float __x, float __y, float __z, int __layer, const float4 *__dPdx, const float4 *__dPdy), "tex.grad.acube.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], " "{%9, %10, %11, %11}, {%12, %13, %14, %14};", ("r"(__layer), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), "f"(__dPdy->z))); __IMPL_ALIAS("__itexCubemapLayeredGrad_v2", "__texCubemapLayeredGrad_v2"); __IMPL_S("__texCubemapLayeredLod_v2", "__texCubemapLayeredLod_rmnf_v2", (float __x, float __y, float __z, int __layer, float __level), "tex.level.acube.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], %9;", ("r"(__layer), "f"(__x), "f"(__y), "f"(__z), "f"(__level))); __IMPL_ALIAS("__itexCubemapLayeredLod", "__texCubemapLayeredLod_v2"); __IMPL_S("__texCubemapLod_v2", "__texCubemapLod_rmnf_v2", (float __x, float __y, float __z, float __level), "tex.level.cube.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;", ("f"(__x), "f"(__y), "f"(__z), "f"(__level))); __IMPL_ALIAS("__itexCubemapLod", "__texCubemapLod_v2"); // Helper class for extracting slice of data from V4 fetch results. template struct __convert { template ::__base_t)> __device__ static __DestT __run(__SrcT __v); template <> __device__ static __DestT __run<1>(__SrcT __v) { return {__v.x}; } template <> __device__ static __DestT __run<2>(__SrcT __v) { return {__v.x, __v.y}; } template <> __device__ static __DestT __run<3>(__SrcT __v) { return {__v.x, __v.y, __v.z}; } template <> __device__ static __DestT __run<4>(__SrcT __v) { return {__v.x, __v.y, __v.z, __v.w}; } }; // These are the top-level function overloads the __nv_tex_surf_handler expands // to. Each overload deals with one of the several ways __nv_tex_surf_handler // is called by CUDA headers. In the end, each of the overloads does the same // job -- it figures out which `__tex_fetch_v4::run` variant should be used to // fetch texture data and which `__convert::run` is needed to convert it into // appropriate return type. // __nv_tex_surf_handler("__tex...", &ret, cudaTextureObject_t handle, args...); // Data type and return type are based on ret. template __device__ static void __tex_fetch(__T *__ptr, cudaTextureObject_t __handle, __Args... __args) { using __FetchT = typename __TypeInfoT<__T>::__fetch_t; *__ptr = __convert<__T, __FetchT>::__run( __tex_fetch_v4<__op>::template __run<__FetchT>(__handle, __args...)); } #if CUDA_VERSION < 12000 // texture<> objects get magically converted into a texture reference. However, // there's no way to convert them to cudaTextureObject_t on C++ level. So, we // cheat a bit and use inline assembly to do it. It costs us an extra register // and a move, but that is easy for ptxas to optimize away. template __device__ cudaTextureObject_t __tex_handle_to_obj(__T __handle) { cudaTextureObject_t __obj; asm("mov.b64 %0, %1; " : "=l"(__obj) : "l"(__handle)); return __obj; } // __nv_tex_surf_handler ("__tex...", &ret, textureReference, args...); // Data type and return type is based on ret. template __device__ static void __tex_fetch(__T *__ptr, __HandleT __handle, __Args... __args) { using __FetchT = typename __TypeInfoT<__T>::__fetch_t; *__ptr = __convert<__T, __FetchT>::__run( __tex_fetch_v4<__op>::template __run<__FetchT>( __tex_handle_to_obj(__handle), __args...)); } // __nv_tex_surf_handler ("__tex...", &type_dummy, &ret, texture<...>, args...); // cudaReadModeNormalizedFloat fetches always return float4. template __device__ static void __tex_fetch(__DataT *, __RetT *__ptr, texture<__DataT, __TexT, cudaReadModeNormalizedFloat> __handle, __Args... __args) { using __FetchT = typename __TypeInfoT<__DataT>::__fetch_t; *__ptr = __convert<__RetT, float4>::__run( __tex_fetch_v4<__op>::template __run<__FetchT>( __tex_handle_to_obj(__handle), __args...)); } // __nv_tex_surf_handler ("__tex...", &type_dummy, &ret, texture<...>, args...); // For cudaReadModeElementType fetch return type is based on type_dummy. template __device__ static void __tex_fetch(__DataT *, __RetT *__ptr, texture<__DataT, __TexT, cudaReadModeElementType> __handle, __Args... __args) { using __FetchT = typename __TypeInfoT<__DataT>::__fetch_t; *__ptr = __convert<__RetT, __FetchT>::__run( __tex_fetch_v4<__op>::template __run<__FetchT>( __tex_handle_to_obj(__handle), __args...)); } #endif // CUDA_VERSION } // namespace __cuda_tex } // namespace #pragma pop_macro("__ASM_OUT") #pragma pop_macro("__ASM_OUTP") #pragma pop_macro("__Args") #pragma pop_macro("__ID") #pragma pop_macro("__IDV") #pragma pop_macro("__IMPL_2DGATHER") #pragma pop_macro("__IMPL_ALIAS") #pragma pop_macro("__IMPL_ALIASI") #pragma pop_macro("__IMPL_F1") #pragma pop_macro("__IMPL_F3") #pragma pop_macro("__IMPL_F3N") #pragma pop_macro("__IMPL_F3S") #pragma pop_macro("__IMPL_S") #pragma pop_macro("__IMPL_S3") #pragma pop_macro("__IMPL_S3I") #pragma pop_macro("__IMPL_S3N") #pragma pop_macro("__IMPL_S3NI") #pragma pop_macro("__IMPL_S3S") #pragma pop_macro("__IMPL_S3SI") #pragma pop_macro("__IMPL_SI") #pragma pop_macro("__L") #pragma pop_macro("__STRIP_PARENS") #endif // __CLANG_CUDA_TEXTURE_INTRINSICS_H__ /*===---- __clang_hip_cmath.h - HIP cmath decls -----------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CLANG_HIP_CMATH_H__ #define __CLANG_HIP_CMATH_H__ #if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__) #error "This file is for HIP and OpenMP AMDGCN device compilation only." #endif #if !defined(__HIPCC_RTC__) #if defined(__cplusplus) #include #include #include #endif #include #include #endif // !defined(__HIPCC_RTC__) #pragma push_macro("__DEVICE__") #pragma push_macro("__CONSTEXPR__") #ifdef __OPENMP_AMDGCN__ #define __DEVICE__ static __attribute__((always_inline, nothrow)) #define __CONSTEXPR__ constexpr #else #define __DEVICE__ static __device__ inline __attribute__((always_inline)) #define __CONSTEXPR__ #endif // __OPENMP_AMDGCN__ // Start with functions that cannot be defined by DEF macros below. #if defined(__cplusplus) #if defined __OPENMP_AMDGCN__ __DEVICE__ __CONSTEXPR__ float fabs(float __x) { return ::fabsf(__x); } __DEVICE__ __CONSTEXPR__ float sin(float __x) { return ::sinf(__x); } __DEVICE__ __CONSTEXPR__ float cos(float __x) { return ::cosf(__x); } #endif __DEVICE__ __CONSTEXPR__ double abs(double __x) { return ::fabs(__x); } __DEVICE__ __CONSTEXPR__ float abs(float __x) { return ::fabsf(__x); } __DEVICE__ __CONSTEXPR__ long long abs(long long __n) { return ::llabs(__n); } __DEVICE__ __CONSTEXPR__ long abs(long __n) { return ::labs(__n); } __DEVICE__ __CONSTEXPR__ float fma(float __x, float __y, float __z) { return ::fmaf(__x, __y, __z); } #if !defined(__HIPCC_RTC__) // The value returned by fpclassify is platform dependent, therefore it is not // supported by hipRTC. __DEVICE__ __CONSTEXPR__ int fpclassify(float __x) { return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, __x); } __DEVICE__ __CONSTEXPR__ int fpclassify(double __x) { return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, __x); } #endif // !defined(__HIPCC_RTC__) __DEVICE__ __CONSTEXPR__ float frexp(float __arg, int *__exp) { return ::frexpf(__arg, __exp); } #if defined(__OPENMP_AMDGCN__) // For OpenMP we work around some old system headers that have non-conforming // `isinf(float)` and `isnan(float)` implementations that return an `int`. We do // this by providing two versions of these functions, differing only in the // return type. To avoid conflicting definitions we disable implicit base // function generation. That means we will end up with two specializations, one // per type, but only one has a base function defined by the system header. #pragma omp begin declare variant match( \ implementation = {extension(disable_implicit_base)}) // FIXME: We lack an extension to customize the mangling of the variants, e.g., // add a suffix. This means we would clash with the names of the variants // (note that we do not create implicit base functions here). To avoid // this clash we add a new trait to some of them that is always true // (this is LLVM after all ;)). It will only influence the mangled name // of the variants inside the inner region and avoid the clash. #pragma omp begin declare variant match(implementation = {vendor(llvm)}) __DEVICE__ __CONSTEXPR__ int isinf(float __x) { return ::__isinff(__x); } __DEVICE__ __CONSTEXPR__ int isinf(double __x) { return ::__isinf(__x); } __DEVICE__ __CONSTEXPR__ int isfinite(float __x) { return ::__finitef(__x); } __DEVICE__ __CONSTEXPR__ int isfinite(double __x) { return ::__finite(__x); } __DEVICE__ __CONSTEXPR__ int isnan(float __x) { return ::__isnanf(__x); } __DEVICE__ __CONSTEXPR__ int isnan(double __x) { return ::__isnan(__x); } #pragma omp end declare variant #endif // defined(__OPENMP_AMDGCN__) __DEVICE__ __CONSTEXPR__ bool isinf(float __x) { return ::__isinff(__x); } __DEVICE__ __CONSTEXPR__ bool isinf(double __x) { return ::__isinf(__x); } __DEVICE__ __CONSTEXPR__ bool isfinite(float __x) { return ::__finitef(__x); } __DEVICE__ __CONSTEXPR__ bool isfinite(double __x) { return ::__finite(__x); } __DEVICE__ __CONSTEXPR__ bool isnan(float __x) { return ::__isnanf(__x); } __DEVICE__ __CONSTEXPR__ bool isnan(double __x) { return ::__isnan(__x); } #if defined(__OPENMP_AMDGCN__) #pragma omp end declare variant #endif // defined(__OPENMP_AMDGCN__) __DEVICE__ __CONSTEXPR__ bool isgreater(float __x, float __y) { return __builtin_isgreater(__x, __y); } __DEVICE__ __CONSTEXPR__ bool isgreater(double __x, double __y) { return __builtin_isgreater(__x, __y); } __DEVICE__ __CONSTEXPR__ bool isgreaterequal(float __x, float __y) { return __builtin_isgreaterequal(__x, __y); } __DEVICE__ __CONSTEXPR__ bool isgreaterequal(double __x, double __y) { return __builtin_isgreaterequal(__x, __y); } __DEVICE__ __CONSTEXPR__ bool isless(float __x, float __y) { return __builtin_isless(__x, __y); } __DEVICE__ __CONSTEXPR__ bool isless(double __x, double __y) { return __builtin_isless(__x, __y); } __DEVICE__ __CONSTEXPR__ bool islessequal(float __x, float __y) { return __builtin_islessequal(__x, __y); } __DEVICE__ __CONSTEXPR__ bool islessequal(double __x, double __y) { return __builtin_islessequal(__x, __y); } __DEVICE__ __CONSTEXPR__ bool islessgreater(float __x, float __y) { return __builtin_islessgreater(__x, __y); } __DEVICE__ __CONSTEXPR__ bool islessgreater(double __x, double __y) { return __builtin_islessgreater(__x, __y); } __DEVICE__ __CONSTEXPR__ bool isnormal(float __x) { return __builtin_isnormal(__x); } __DEVICE__ __CONSTEXPR__ bool isnormal(double __x) { return __builtin_isnormal(__x); } __DEVICE__ __CONSTEXPR__ bool isunordered(float __x, float __y) { return __builtin_isunordered(__x, __y); } __DEVICE__ __CONSTEXPR__ bool isunordered(double __x, double __y) { return __builtin_isunordered(__x, __y); } __DEVICE__ __CONSTEXPR__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); } __DEVICE__ __CONSTEXPR__ float pow(float __base, int __iexp) { return ::powif(__base, __iexp); } __DEVICE__ __CONSTEXPR__ double pow(double __base, int __iexp) { return ::powi(__base, __iexp); } __DEVICE__ __CONSTEXPR__ float remquo(float __x, float __y, int *__quo) { return ::remquof(__x, __y, __quo); } __DEVICE__ __CONSTEXPR__ float scalbln(float __x, long int __n) { return ::scalblnf(__x, __n); } __DEVICE__ __CONSTEXPR__ bool signbit(float __x) { return ::__signbitf(__x); } __DEVICE__ __CONSTEXPR__ bool signbit(double __x) { return ::__signbit(__x); } // Notably missing above is nexttoward. We omit it because // ocml doesn't provide an implementation, and we don't want to be in the // business of implementing tricky libm functions in this header. // Other functions. __DEVICE__ __CONSTEXPR__ _Float16 fma(_Float16 __x, _Float16 __y, _Float16 __z) { return __builtin_fmaf16(__x, __y, __z); } __DEVICE__ __CONSTEXPR__ _Float16 pow(_Float16 __base, int __iexp) { return __ocml_pown_f16(__base, __iexp); } #ifndef __OPENMP_AMDGCN__ // BEGIN DEF_FUN and HIP_OVERLOAD // BEGIN DEF_FUN #pragma push_macro("__DEF_FUN1") #pragma push_macro("__DEF_FUN2") #pragma push_macro("__DEF_FUN2_FI") // Define cmath functions with float argument and returns __retty. #define __DEF_FUN1(__retty, __func) \ __DEVICE__ __CONSTEXPR__ __retty __func(float __x) { return __func##f(__x); } // Define cmath functions with two float arguments and returns __retty. #define __DEF_FUN2(__retty, __func) \ __DEVICE__ __CONSTEXPR__ __retty __func(float __x, float __y) { \ return __func##f(__x, __y); \ } // Define cmath functions with a float and an int argument and returns __retty. #define __DEF_FUN2_FI(__retty, __func) \ __DEVICE__ __CONSTEXPR__ __retty __func(float __x, int __y) { \ return __func##f(__x, __y); \ } __DEF_FUN1(float, acos) __DEF_FUN1(float, acosh) __DEF_FUN1(float, asin) __DEF_FUN1(float, asinh) __DEF_FUN1(float, atan) __DEF_FUN2(float, atan2) __DEF_FUN1(float, atanh) __DEF_FUN1(float, cbrt) __DEF_FUN1(float, ceil) __DEF_FUN2(float, copysign) __DEF_FUN1(float, cos) __DEF_FUN1(float, cosh) __DEF_FUN1(float, erf) __DEF_FUN1(float, erfc) __DEF_FUN1(float, exp) __DEF_FUN1(float, exp2) __DEF_FUN1(float, expm1) __DEF_FUN1(float, fabs) __DEF_FUN2(float, fdim) __DEF_FUN1(float, floor) __DEF_FUN2(float, fmax) __DEF_FUN2(float, fmin) __DEF_FUN2(float, fmod) __DEF_FUN2(float, hypot) __DEF_FUN1(int, ilogb) __DEF_FUN2_FI(float, ldexp) __DEF_FUN1(float, lgamma) __DEF_FUN1(float, log) __DEF_FUN1(float, log10) __DEF_FUN1(float, log1p) __DEF_FUN1(float, log2) __DEF_FUN1(float, logb) __DEF_FUN1(long long, llrint) __DEF_FUN1(long long, llround) __DEF_FUN1(long, lrint) __DEF_FUN1(long, lround) __DEF_FUN1(float, nearbyint) __DEF_FUN2(float, nextafter) __DEF_FUN2(float, pow) __DEF_FUN2(float, remainder) __DEF_FUN1(float, rint) __DEF_FUN1(float, round) __DEF_FUN2_FI(float, scalbn) __DEF_FUN1(float, sin) __DEF_FUN1(float, sinh) __DEF_FUN1(float, sqrt) __DEF_FUN1(float, tan) __DEF_FUN1(float, tanh) __DEF_FUN1(float, tgamma) __DEF_FUN1(float, trunc) #pragma pop_macro("__DEF_FUN1") #pragma pop_macro("__DEF_FUN2") #pragma pop_macro("__DEF_FUN2_FI") // END DEF_FUN // BEGIN HIP_OVERLOAD #pragma push_macro("__HIP_OVERLOAD1") #pragma push_macro("__HIP_OVERLOAD2") // __hip_enable_if::type is a type function which returns __T if __B is true. template struct __hip_enable_if {}; template struct __hip_enable_if { typedef __T type; }; namespace __hip { template struct is_integral { enum { value = 0 }; }; template <> struct is_integral { enum { value = 1 }; }; template <> struct is_integral { enum { value = 1 }; }; template <> struct is_integral { enum { value = 1 }; }; template <> struct is_integral { enum { value = 1 }; }; template <> struct is_integral { enum { value = 1 }; }; template <> struct is_integral { enum { value = 1 }; }; template <> struct is_integral { enum { value = 1 }; }; template <> struct is_integral { enum { value = 1 }; }; template <> struct is_integral { enum { value = 1 }; }; template <> struct is_integral { enum { value = 1 }; }; template <> struct is_integral { enum { value = 1 }; }; template <> struct is_integral { enum { value = 1 }; }; template <> struct is_integral { enum { value = 1 }; }; // ToDo: specializes is_arithmetic<_Float16> template struct is_arithmetic { enum { value = 0 }; }; template <> struct is_arithmetic { enum { value = 1 }; }; template <> struct is_arithmetic { enum { value = 1 }; }; template <> struct is_arithmetic { enum { value = 1 }; }; template <> struct is_arithmetic { enum { value = 1 }; }; template <> struct is_arithmetic { enum { value = 1 }; }; template <> struct is_arithmetic { enum { value = 1 }; }; template <> struct is_arithmetic { enum { value = 1 }; }; template <> struct is_arithmetic { enum { value = 1 }; }; template <> struct is_arithmetic { enum { value = 1 }; }; template <> struct is_arithmetic { enum { value = 1 }; }; template <> struct is_arithmetic { enum { value = 1 }; }; template <> struct is_arithmetic { enum { value = 1 }; }; template <> struct is_arithmetic { enum { value = 1 }; }; template <> struct is_arithmetic { enum { value = 1 }; }; template <> struct is_arithmetic { enum { value = 1 }; }; struct true_type { static const __constant__ bool value = true; }; struct false_type { static const __constant__ bool value = false; }; template struct is_same : public false_type {}; template struct is_same<__T, __T> : public true_type {}; template struct add_rvalue_reference { typedef __T &&type; }; template typename add_rvalue_reference<__T>::type declval(); // decltype is only available in C++11 and above. #if __cplusplus >= 201103L // __hip_promote template struct __numeric_type { static void __test(...); static _Float16 __test(_Float16); static float __test(float); static double __test(char); static double __test(int); static double __test(unsigned); static double __test(long); static double __test(unsigned long); static double __test(long long); static double __test(unsigned long long); static double __test(double); // No support for long double, use double instead. static double __test(long double); typedef decltype(__test(declval<_Tp>())) type; static const bool value = !is_same::value; }; template <> struct __numeric_type { static const bool value = true; }; template ::value &&__numeric_type<_A2>::value &&__numeric_type<_A3>::value> class __promote_imp { public: static const bool value = false; }; template class __promote_imp<_A1, _A2, _A3, true> { private: typedef typename __promote_imp<_A1>::type __type1; typedef typename __promote_imp<_A2>::type __type2; typedef typename __promote_imp<_A3>::type __type3; public: typedef decltype(__type1() + __type2() + __type3()) type; static const bool value = true; }; template class __promote_imp<_A1, _A2, void, true> { private: typedef typename __promote_imp<_A1>::type __type1; typedef typename __promote_imp<_A2>::type __type2; public: typedef decltype(__type1() + __type2()) type; static const bool value = true; }; template class __promote_imp<_A1, void, void, true> { public: typedef typename __numeric_type<_A1>::type type; static const bool value = true; }; template class __promote : public __promote_imp<_A1, _A2, _A3> {}; #endif //__cplusplus >= 201103L } // namespace __hip // __HIP_OVERLOAD1 is used to resolve function calls with integer argument to // avoid compilation error due to ambibuity. e.g. floor(5) is resolved with // floor(double). #define __HIP_OVERLOAD1(__retty, __fn) \ template \ __DEVICE__ __CONSTEXPR__ \ typename __hip_enable_if<__hip::is_integral<__T>::value, __retty>::type \ __fn(__T __x) { \ return ::__fn((double)__x); \ } // __HIP_OVERLOAD2 is used to resolve function calls with mixed float/double // or integer argument to avoid compilation error due to ambibuity. e.g. // max(5.0f, 6.0) is resolved with max(double, double). #if __cplusplus >= 201103L #define __HIP_OVERLOAD2(__retty, __fn) \ template \ __DEVICE__ __CONSTEXPR__ typename __hip_enable_if< \ __hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value, \ typename __hip::__promote<__T1, __T2>::type>::type \ __fn(__T1 __x, __T2 __y) { \ typedef typename __hip::__promote<__T1, __T2>::type __result_type; \ return __fn((__result_type)__x, (__result_type)__y); \ } #else #define __HIP_OVERLOAD2(__retty, __fn) \ template \ __DEVICE__ __CONSTEXPR__ \ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value && \ __hip::is_arithmetic<__T2>::value, \ __retty>::type \ __fn(__T1 __x, __T2 __y) { \ return __fn((double)__x, (double)__y); \ } #endif __HIP_OVERLOAD1(double, acos) __HIP_OVERLOAD1(double, acosh) __HIP_OVERLOAD1(double, asin) __HIP_OVERLOAD1(double, asinh) __HIP_OVERLOAD1(double, atan) __HIP_OVERLOAD2(double, atan2) __HIP_OVERLOAD1(double, atanh) __HIP_OVERLOAD1(double, cbrt) __HIP_OVERLOAD1(double, ceil) __HIP_OVERLOAD2(double, copysign) __HIP_OVERLOAD1(double, cos) __HIP_OVERLOAD1(double, cosh) __HIP_OVERLOAD1(double, erf) __HIP_OVERLOAD1(double, erfc) __HIP_OVERLOAD1(double, exp) __HIP_OVERLOAD1(double, exp2) __HIP_OVERLOAD1(double, expm1) __HIP_OVERLOAD1(double, fabs) __HIP_OVERLOAD2(double, fdim) __HIP_OVERLOAD1(double, floor) __HIP_OVERLOAD2(double, fmax) __HIP_OVERLOAD2(double, fmin) __HIP_OVERLOAD2(double, fmod) #if !defined(__HIPCC_RTC__) __HIP_OVERLOAD1(int, fpclassify) #endif // !defined(__HIPCC_RTC__) __HIP_OVERLOAD2(double, hypot) __HIP_OVERLOAD1(int, ilogb) __HIP_OVERLOAD1(bool, isfinite) __HIP_OVERLOAD2(bool, isgreater) __HIP_OVERLOAD2(bool, isgreaterequal) __HIP_OVERLOAD1(bool, isinf) __HIP_OVERLOAD2(bool, isless) __HIP_OVERLOAD2(bool, islessequal) __HIP_OVERLOAD2(bool, islessgreater) __HIP_OVERLOAD1(bool, isnan) __HIP_OVERLOAD1(bool, isnormal) __HIP_OVERLOAD2(bool, isunordered) __HIP_OVERLOAD1(double, lgamma) __HIP_OVERLOAD1(double, log) __HIP_OVERLOAD1(double, log10) __HIP_OVERLOAD1(double, log1p) __HIP_OVERLOAD1(double, log2) __HIP_OVERLOAD1(double, logb) __HIP_OVERLOAD1(long long, llrint) __HIP_OVERLOAD1(long long, llround) __HIP_OVERLOAD1(long, lrint) __HIP_OVERLOAD1(long, lround) __HIP_OVERLOAD1(double, nearbyint) __HIP_OVERLOAD2(double, nextafter) __HIP_OVERLOAD2(double, pow) __HIP_OVERLOAD2(double, remainder) __HIP_OVERLOAD1(double, rint) __HIP_OVERLOAD1(double, round) __HIP_OVERLOAD1(bool, signbit) __HIP_OVERLOAD1(double, sin) __HIP_OVERLOAD1(double, sinh) __HIP_OVERLOAD1(double, sqrt) __HIP_OVERLOAD1(double, tan) __HIP_OVERLOAD1(double, tanh) __HIP_OVERLOAD1(double, tgamma) __HIP_OVERLOAD1(double, trunc) // Overload these but don't add them to std, they are not part of cmath. __HIP_OVERLOAD2(double, max) __HIP_OVERLOAD2(double, min) // Additional Overloads that don't quite match HIP_OVERLOAD. #if __cplusplus >= 201103L template __DEVICE__ __CONSTEXPR__ typename __hip_enable_if< __hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value && __hip::is_arithmetic<__T3>::value, typename __hip::__promote<__T1, __T2, __T3>::type>::type fma(__T1 __x, __T2 __y, __T3 __z) { typedef typename __hip::__promote<__T1, __T2, __T3>::type __result_type; return ::fma((__result_type)__x, (__result_type)__y, (__result_type)__z); } #else template __DEVICE__ __CONSTEXPR__ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value && __hip::is_arithmetic<__T3>::value, double>::type fma(__T1 __x, __T2 __y, __T3 __z) { return ::fma((double)__x, (double)__y, (double)__z); } #endif template __DEVICE__ __CONSTEXPR__ typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type frexp(__T __x, int *__exp) { return ::frexp((double)__x, __exp); } template __DEVICE__ __CONSTEXPR__ typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type ldexp(__T __x, int __exp) { return ::ldexp((double)__x, __exp); } template __DEVICE__ __CONSTEXPR__ typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type modf(__T __x, double *__exp) { return ::modf((double)__x, __exp); } #if __cplusplus >= 201103L template __DEVICE__ __CONSTEXPR__ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value, typename __hip::__promote<__T1, __T2>::type>::type remquo(__T1 __x, __T2 __y, int *__quo) { typedef typename __hip::__promote<__T1, __T2>::type __result_type; return ::remquo((__result_type)__x, (__result_type)__y, __quo); } #else template __DEVICE__ __CONSTEXPR__ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value, double>::type remquo(__T1 __x, __T2 __y, int *__quo) { return ::remquo((double)__x, (double)__y, __quo); } #endif template __DEVICE__ __CONSTEXPR__ typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type scalbln(__T __x, long int __exp) { return ::scalbln((double)__x, __exp); } template __DEVICE__ __CONSTEXPR__ typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type scalbn(__T __x, int __exp) { return ::scalbn((double)__x, __exp); } #pragma pop_macro("__HIP_OVERLOAD1") #pragma pop_macro("__HIP_OVERLOAD2") // END HIP_OVERLOAD // END DEF_FUN and HIP_OVERLOAD #endif // ifndef __OPENMP_AMDGCN__ #endif // defined(__cplusplus) #ifndef __OPENMP_AMDGCN__ // Define these overloads inside the namespace our standard library uses. #if !defined(__HIPCC_RTC__) #ifdef _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_BEGIN_NAMESPACE_STD #else namespace std { #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION _GLIBCXX_BEGIN_NAMESPACE_VERSION #endif // _GLIBCXX_BEGIN_NAMESPACE_VERSION #endif // _LIBCPP_BEGIN_NAMESPACE_STD // Pull the new overloads we defined above into namespace std. // using ::abs; - This may be considered for C++. using ::acos; using ::acosh; using ::asin; using ::asinh; using ::atan; using ::atan2; using ::atanh; using ::cbrt; using ::ceil; using ::copysign; using ::cos; using ::cosh; using ::erf; using ::erfc; using ::exp; using ::exp2; using ::expm1; using ::fabs; using ::fdim; using ::floor; using ::fma; using ::fmax; using ::fmin; using ::fmod; using ::fpclassify; using ::frexp; using ::hypot; using ::ilogb; using ::isfinite; using ::isgreater; using ::isgreaterequal; using ::isless; using ::islessequal; using ::islessgreater; using ::isnormal; using ::isunordered; using ::ldexp; using ::lgamma; using ::llrint; using ::llround; using ::log; using ::log10; using ::log1p; using ::log2; using ::logb; using ::lrint; using ::lround; using ::modf; // using ::nan; - This may be considered for C++. // using ::nanf; - This may be considered for C++. // using ::nanl; - This is not yet defined. using ::nearbyint; using ::nextafter; // using ::nexttoward; - Omit this since we do not have a definition. using ::pow; using ::remainder; using ::remquo; using ::rint; using ::round; using ::scalbln; using ::scalbn; using ::signbit; using ::sin; using ::sinh; using ::sqrt; using ::tan; using ::tanh; using ::tgamma; using ::trunc; // Well this is fun: We need to pull these symbols in for libc++, but we can't // pull them in with libstdc++, because its ::isinf and ::isnan are different // than its std::isinf and std::isnan. #ifndef __GLIBCXX__ using ::isinf; using ::isnan; #endif // Finally, pull the "foobarf" functions that HIP defines into std. using ::acosf; using ::acoshf; using ::asinf; using ::asinhf; using ::atan2f; using ::atanf; using ::atanhf; using ::cbrtf; using ::ceilf; using ::copysignf; using ::cosf; using ::coshf; using ::erfcf; using ::erff; using ::exp2f; using ::expf; using ::expm1f; using ::fabsf; using ::fdimf; using ::floorf; using ::fmaf; using ::fmaxf; using ::fminf; using ::fmodf; using ::frexpf; using ::hypotf; using ::ilogbf; using ::ldexpf; using ::lgammaf; using ::llrintf; using ::llroundf; using ::log10f; using ::log1pf; using ::log2f; using ::logbf; using ::logf; using ::lrintf; using ::lroundf; using ::modff; using ::nearbyintf; using ::nextafterf; // using ::nexttowardf; - Omit this since we do not have a definition. using ::powf; using ::remainderf; using ::remquof; using ::rintf; using ::roundf; using ::scalblnf; using ::scalbnf; using ::sinf; using ::sinhf; using ::sqrtf; using ::tanf; using ::tanhf; using ::tgammaf; using ::truncf; #ifdef _LIBCPP_END_NAMESPACE_STD _LIBCPP_END_NAMESPACE_STD #else #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION _GLIBCXX_END_NAMESPACE_VERSION #endif // _GLIBCXX_BEGIN_NAMESPACE_VERSION } // namespace std #endif // _LIBCPP_END_NAMESPACE_STD #endif // !defined(__HIPCC_RTC__) // Define device-side math functions from on MSVC. #if !defined(__HIPCC_RTC__) #if defined(_MSC_VER) // Before VS2019, `` is also included in `` and other headers. // But, from VS2019, it's only included in ``. Need to include // `` here to ensure C functions declared there won't be markded as // `__host__` and `__device__` through `` wrapper. #include #if defined(__cplusplus) extern "C" { #endif // defined(__cplusplus) __DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) double _Cosh(double x, double y) { return cosh(x) * y; } __DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) float _FCosh(float x, float y) { return coshf(x) * y; } __DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) short _Dtest(double *p) { return fpclassify(*p); } __DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) short _FDtest(float *p) { return fpclassify(*p); } __DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) double _Sinh(double x, double y) { return sinh(x) * y; } __DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) float _FSinh(float x, float y) { return sinhf(x) * y; } #if defined(__cplusplus) } #endif // defined(__cplusplus) #endif // defined(_MSC_VER) #endif // !defined(__HIPCC_RTC__) #endif // ifndef __OPENMP_AMDGCN__ #pragma pop_macro("__DEVICE__") #pragma pop_macro("__CONSTEXPR__") #endif // __CLANG_HIP_CMATH_H__ /builtins/__clang_hip_libdevice_declares.h/*===---- __clang_hip_libdevice_declares.h - HIP device library decls -------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CLANG_HIP_LIBDEVICE_DECLARES_H__ #define __CLANG_HIP_LIBDEVICE_DECLARES_H__ #if !defined(__HIPCC_RTC__) && __has_include("hip/hip_version.h") #include "hip/hip_version.h" #endif // __has_include("hip/hip_version.h") #ifdef __cplusplus extern "C" { #endif // BEGIN FLOAT __device__ __attribute__((const)) float __ocml_acos_f32(float); __device__ __attribute__((pure)) float __ocml_acosh_f32(float); __device__ __attribute__((const)) float __ocml_asin_f32(float); __device__ __attribute__((pure)) float __ocml_asinh_f32(float); __device__ __attribute__((const)) float __ocml_atan2_f32(float, float); __device__ __attribute__((const)) float __ocml_atan_f32(float); __device__ __attribute__((pure)) float __ocml_atanh_f32(float); __device__ __attribute__((pure)) float __ocml_cbrt_f32(float); __device__ __attribute__((const)) float __ocml_ceil_f32(float); __device__ __attribute__((const)) __device__ float __ocml_copysign_f32(float, float); __device__ float __ocml_cos_f32(float); __device__ float __ocml_native_cos_f32(float); __device__ __attribute__((pure)) __device__ float __ocml_cosh_f32(float); __device__ float __ocml_cospi_f32(float); __device__ float __ocml_i0_f32(float); __device__ float __ocml_i1_f32(float); __device__ __attribute__((pure)) float __ocml_erfc_f32(float); __device__ __attribute__((pure)) float __ocml_erfcinv_f32(float); __device__ __attribute__((pure)) float __ocml_erfcx_f32(float); __device__ __attribute__((pure)) float __ocml_erf_f32(float); __device__ __attribute__((pure)) float __ocml_erfinv_f32(float); __device__ __attribute__((pure)) float __ocml_exp10_f32(float); __device__ __attribute__((pure)) float __ocml_native_exp10_f32(float); __device__ __attribute__((pure)) float __ocml_exp2_f32(float); __device__ __attribute__((pure)) float __ocml_exp_f32(float); __device__ __attribute__((pure)) float __ocml_native_exp_f32(float); __device__ __attribute__((pure)) float __ocml_expm1_f32(float); __device__ __attribute__((const)) float __ocml_fabs_f32(float); __device__ __attribute__((const)) float __ocml_fdim_f32(float, float); __device__ __attribute__((const)) float __ocml_floor_f32(float); __device__ __attribute__((const)) float __ocml_fma_f32(float, float, float); __device__ __attribute__((const)) float __ocml_fmax_f32(float, float); __device__ __attribute__((const)) float __ocml_fmin_f32(float, float); __device__ __attribute__((const)) __device__ float __ocml_fmod_f32(float, float); __device__ float __ocml_frexp_f32(float, __attribute__((address_space(5))) int *); __device__ __attribute__((const)) float __ocml_hypot_f32(float, float); __device__ __attribute__((const)) int __ocml_ilogb_f32(float); __device__ __attribute__((const)) int __ocml_isfinite_f32(float); __device__ __attribute__((const)) int __ocml_isinf_f32(float); __device__ __attribute__((const)) int __ocml_isnan_f32(float); __device__ float __ocml_j0_f32(float); __device__ float __ocml_j1_f32(float); __device__ __attribute__((const)) float __ocml_ldexp_f32(float, int); __device__ float __ocml_lgamma_f32(float); __device__ __attribute__((pure)) float __ocml_log10_f32(float); __device__ __attribute__((pure)) float __ocml_native_log10_f32(float); __device__ __attribute__((pure)) float __ocml_log1p_f32(float); __device__ __attribute__((pure)) float __ocml_log2_f32(float); __device__ __attribute__((pure)) float __ocml_native_log2_f32(float); __device__ __attribute__((const)) float __ocml_logb_f32(float); __device__ __attribute__((pure)) float __ocml_log_f32(float); __device__ __attribute__((pure)) float __ocml_native_log_f32(float); __device__ float __ocml_modf_f32(float, __attribute__((address_space(5))) float *); __device__ __attribute__((const)) float __ocml_nearbyint_f32(float); __device__ __attribute__((const)) float __ocml_nextafter_f32(float, float); __device__ __attribute__((const)) float __ocml_len3_f32(float, float, float); __device__ __attribute__((const)) float __ocml_len4_f32(float, float, float, float); __device__ __attribute__((pure)) float __ocml_ncdf_f32(float); __device__ __attribute__((pure)) float __ocml_ncdfinv_f32(float); __device__ __attribute__((pure)) float __ocml_pow_f32(float, float); __device__ __attribute__((pure)) float __ocml_pown_f32(float, int); __device__ __attribute__((pure)) float __ocml_rcbrt_f32(float); __device__ __attribute__((const)) float __ocml_remainder_f32(float, float); __device__ float __ocml_remquo_f32(float, float, __attribute__((address_space(5))) int *); __device__ __attribute__((const)) float __ocml_rhypot_f32(float, float); __device__ __attribute__((const)) float __ocml_rint_f32(float); __device__ __attribute__((const)) float __ocml_rlen3_f32(float, float, float); __device__ __attribute__((const)) float __ocml_rlen4_f32(float, float, float, float); __device__ __attribute__((const)) float __ocml_round_f32(float); __device__ __attribute__((pure)) float __ocml_rsqrt_f32(float); __device__ __attribute__((const)) float __ocml_scalb_f32(float, float); __device__ __attribute__((const)) float __ocml_scalbn_f32(float, int); __device__ __attribute__((const)) int __ocml_signbit_f32(float); __device__ float __ocml_sincos_f32(float, __attribute__((address_space(5))) float *); __device__ float __ocml_sincospi_f32(float, __attribute__((address_space(5))) float *); __device__ float __ocml_sin_f32(float); __device__ float __ocml_native_sin_f32(float); __device__ __attribute__((pure)) float __ocml_sinh_f32(float); __device__ float __ocml_sinpi_f32(float); __device__ __attribute__((const)) float __ocml_sqrt_f32(float); __device__ __attribute__((const)) float __ocml_native_sqrt_f32(float); __device__ float __ocml_tan_f32(float); __device__ __attribute__((pure)) float __ocml_tanh_f32(float); __device__ float __ocml_tgamma_f32(float); __device__ __attribute__((const)) float __ocml_trunc_f32(float); __device__ float __ocml_y0_f32(float); __device__ float __ocml_y1_f32(float); // BEGIN INTRINSICS __device__ __attribute__((const)) float __ocml_add_rte_f32(float, float); __device__ __attribute__((const)) float __ocml_add_rtn_f32(float, float); __device__ __attribute__((const)) float __ocml_add_rtp_f32(float, float); __device__ __attribute__((const)) float __ocml_add_rtz_f32(float, float); __device__ __attribute__((const)) float __ocml_sub_rte_f32(float, float); __device__ __attribute__((const)) float __ocml_sub_rtn_f32(float, float); __device__ __attribute__((const)) float __ocml_sub_rtp_f32(float, float); __device__ __attribute__((const)) float __ocml_sub_rtz_f32(float, float); __device__ __attribute__((const)) float __ocml_mul_rte_f32(float, float); __device__ __attribute__((const)) float __ocml_mul_rtn_f32(float, float); __device__ __attribute__((const)) float __ocml_mul_rtp_f32(float, float); __device__ __attribute__((const)) float __ocml_mul_rtz_f32(float, float); __device__ __attribute__((const)) float __ocml_div_rte_f32(float, float); __device__ __attribute__((const)) float __ocml_div_rtn_f32(float, float); __device__ __attribute__((const)) float __ocml_div_rtp_f32(float, float); __device__ __attribute__((const)) float __ocml_div_rtz_f32(float, float); __device__ __attribute__((const)) float __ocml_sqrt_rte_f32(float); __device__ __attribute__((const)) float __ocml_sqrt_rtn_f32(float); __device__ __attribute__((const)) float __ocml_sqrt_rtp_f32(float); __device__ __attribute__((const)) float __ocml_sqrt_rtz_f32(float); __device__ __attribute__((const)) float __ocml_fma_rte_f32(float, float, float); __device__ __attribute__((const)) float __ocml_fma_rtn_f32(float, float, float); __device__ __attribute__((const)) float __ocml_fma_rtp_f32(float, float, float); __device__ __attribute__((const)) float __ocml_fma_rtz_f32(float, float, float); // END INTRINSICS // END FLOAT // BEGIN DOUBLE __device__ __attribute__((const)) double __ocml_acos_f64(double); __device__ __attribute__((pure)) double __ocml_acosh_f64(double); __device__ __attribute__((const)) double __ocml_asin_f64(double); __device__ __attribute__((pure)) double __ocml_asinh_f64(double); __device__ __attribute__((const)) double __ocml_atan2_f64(double, double); __device__ __attribute__((const)) double __ocml_atan_f64(double); __device__ __attribute__((pure)) double __ocml_atanh_f64(double); __device__ __attribute__((pure)) double __ocml_cbrt_f64(double); __device__ __attribute__((const)) double __ocml_ceil_f64(double); __device__ __attribute__((const)) double __ocml_copysign_f64(double, double); __device__ double __ocml_cos_f64(double); __device__ __attribute__((pure)) double __ocml_cosh_f64(double); __device__ double __ocml_cospi_f64(double); __device__ double __ocml_i0_f64(double); __device__ double __ocml_i1_f64(double); __device__ __attribute__((pure)) double __ocml_erfc_f64(double); __device__ __attribute__((pure)) double __ocml_erfcinv_f64(double); __device__ __attribute__((pure)) double __ocml_erfcx_f64(double); __device__ __attribute__((pure)) double __ocml_erf_f64(double); __device__ __attribute__((pure)) double __ocml_erfinv_f64(double); __device__ __attribute__((pure)) double __ocml_exp10_f64(double); __device__ __attribute__((pure)) double __ocml_exp2_f64(double); __device__ __attribute__((pure)) double __ocml_exp_f64(double); __device__ __attribute__((pure)) double __ocml_expm1_f64(double); __device__ __attribute__((const)) double __ocml_fabs_f64(double); __device__ __attribute__((const)) double __ocml_fdim_f64(double, double); __device__ __attribute__((const)) double __ocml_floor_f64(double); __device__ __attribute__((const)) double __ocml_fma_f64(double, double, double); __device__ __attribute__((const)) double __ocml_fmax_f64(double, double); __device__ __attribute__((const)) double __ocml_fmin_f64(double, double); __device__ __attribute__((const)) double __ocml_fmod_f64(double, double); __device__ double __ocml_frexp_f64(double, __attribute__((address_space(5))) int *); __device__ __attribute__((const)) double __ocml_hypot_f64(double, double); __device__ __attribute__((const)) int __ocml_ilogb_f64(double); __device__ __attribute__((const)) int __ocml_isfinite_f64(double); __device__ __attribute__((const)) int __ocml_isinf_f64(double); __device__ __attribute__((const)) int __ocml_isnan_f64(double); __device__ double __ocml_j0_f64(double); __device__ double __ocml_j1_f64(double); __device__ __attribute__((const)) double __ocml_ldexp_f64(double, int); __device__ double __ocml_lgamma_f64(double); __device__ __attribute__((pure)) double __ocml_log10_f64(double); __device__ __attribute__((pure)) double __ocml_log1p_f64(double); __device__ __attribute__((pure)) double __ocml_log2_f64(double); __device__ __attribute__((const)) double __ocml_logb_f64(double); __device__ __attribute__((pure)) double __ocml_log_f64(double); __device__ double __ocml_modf_f64(double, __attribute__((address_space(5))) double *); __device__ __attribute__((const)) double __ocml_nearbyint_f64(double); __device__ __attribute__((const)) double __ocml_nextafter_f64(double, double); __device__ __attribute__((const)) double __ocml_len3_f64(double, double, double); __device__ __attribute__((const)) double __ocml_len4_f64(double, double, double, double); __device__ __attribute__((pure)) double __ocml_ncdf_f64(double); __device__ __attribute__((pure)) double __ocml_ncdfinv_f64(double); __device__ __attribute__((pure)) double __ocml_pow_f64(double, double); __device__ __attribute__((pure)) double __ocml_pown_f64(double, int); __device__ __attribute__((pure)) double __ocml_rcbrt_f64(double); __device__ __attribute__((const)) double __ocml_remainder_f64(double, double); __device__ double __ocml_remquo_f64(double, double, __attribute__((address_space(5))) int *); __device__ __attribute__((const)) double __ocml_rhypot_f64(double, double); __device__ __attribute__((const)) double __ocml_rint_f64(double); __device__ __attribute__((const)) double __ocml_rlen3_f64(double, double, double); __device__ __attribute__((const)) double __ocml_rlen4_f64(double, double, double, double); __device__ __attribute__((const)) double __ocml_round_f64(double); __device__ __attribute__((pure)) double __ocml_rsqrt_f64(double); __device__ __attribute__((const)) double __ocml_scalb_f64(double, double); __device__ __attribute__((const)) double __ocml_scalbn_f64(double, int); __device__ __attribute__((const)) int __ocml_signbit_f64(double); __device__ double __ocml_sincos_f64(double, __attribute__((address_space(5))) double *); __device__ double __ocml_sincospi_f64(double, __attribute__((address_space(5))) double *); __device__ double __ocml_sin_f64(double); __device__ __attribute__((pure)) double __ocml_sinh_f64(double); __device__ double __ocml_sinpi_f64(double); __device__ __attribute__((const)) double __ocml_sqrt_f64(double); __device__ double __ocml_tan_f64(double); __device__ __attribute__((pure)) double __ocml_tanh_f64(double); __device__ double __ocml_tgamma_f64(double); __device__ __attribute__((const)) double __ocml_trunc_f64(double); __device__ double __ocml_y0_f64(double); __device__ double __ocml_y1_f64(double); // BEGIN INTRINSICS __device__ __attribute__((const)) double __ocml_add_rte_f64(double, double); __device__ __attribute__((const)) double __ocml_add_rtn_f64(double, double); __device__ __attribute__((const)) double __ocml_add_rtp_f64(double, double); __device__ __attribute__((const)) double __ocml_add_rtz_f64(double, double); __device__ __attribute__((const)) double __ocml_sub_rte_f64(double, double); __device__ __attribute__((const)) double __ocml_sub_rtn_f64(double, double); __device__ __attribute__((const)) double __ocml_sub_rtp_f64(double, double); __device__ __attribute__((const)) double __ocml_sub_rtz_f64(double, double); __device__ __attribute__((const)) double __ocml_mul_rte_f64(double, double); __device__ __attribute__((const)) double __ocml_mul_rtn_f64(double, double); __device__ __attribute__((const)) double __ocml_mul_rtp_f64(double, double); __device__ __attribute__((const)) double __ocml_mul_rtz_f64(double, double); __device__ __attribute__((const)) double __ocml_div_rte_f64(double, double); __device__ __attribute__((const)) double __ocml_div_rtn_f64(double, double); __device__ __attribute__((const)) double __ocml_div_rtp_f64(double, double); __device__ __attribute__((const)) double __ocml_div_rtz_f64(double, double); __device__ __attribute__((const)) double __ocml_sqrt_rte_f64(double); __device__ __attribute__((const)) double __ocml_sqrt_rtn_f64(double); __device__ __attribute__((const)) double __ocml_sqrt_rtp_f64(double); __device__ __attribute__((const)) double __ocml_sqrt_rtz_f64(double); __device__ __attribute__((const)) double __ocml_fma_rte_f64(double, double, double); __device__ __attribute__((const)) double __ocml_fma_rtn_f64(double, double, double); __device__ __attribute__((const)) double __ocml_fma_rtp_f64(double, double, double); __device__ __attribute__((const)) double __ocml_fma_rtz_f64(double, double, double); __device__ __attribute__((const)) _Float16 __ocml_ceil_f16(_Float16); __device__ _Float16 __ocml_cos_f16(_Float16); __device__ __attribute__((const)) _Float16 __ocml_cvtrtn_f16_f32(float); __device__ __attribute__((const)) _Float16 __ocml_cvtrtp_f16_f32(float); __device__ __attribute__((const)) _Float16 __ocml_cvtrtz_f16_f32(float); __device__ __attribute__((pure)) _Float16 __ocml_exp_f16(_Float16); __device__ __attribute__((pure)) _Float16 __ocml_exp10_f16(_Float16); __device__ __attribute__((pure)) _Float16 __ocml_exp2_f16(_Float16); __device__ __attribute__((const)) _Float16 __ocml_floor_f16(_Float16); __device__ __attribute__((const)) _Float16 __ocml_fma_f16(_Float16, _Float16, _Float16); __device__ __attribute__((const)) _Float16 __ocml_fmax_f16(_Float16, _Float16); __device__ __attribute__((const)) _Float16 __ocml_fmin_f16(_Float16, _Float16); __device__ __attribute__((const)) _Float16 __ocml_fabs_f16(_Float16); __device__ __attribute__((const)) int __ocml_isinf_f16(_Float16); __device__ __attribute__((const)) int __ocml_isnan_f16(_Float16); __device__ __attribute__((pure)) _Float16 __ocml_log_f16(_Float16); __device__ __attribute__((pure)) _Float16 __ocml_log10_f16(_Float16); __device__ __attribute__((pure)) _Float16 __ocml_log2_f16(_Float16); __device__ __attribute__((const)) _Float16 __ocml_rint_f16(_Float16); __device__ __attribute__((const)) _Float16 __ocml_rsqrt_f16(_Float16); __device__ _Float16 __ocml_sin_f16(_Float16); __device__ __attribute__((const)) _Float16 __ocml_sqrt_f16(_Float16); __device__ __attribute__((const)) _Float16 __ocml_trunc_f16(_Float16); __device__ __attribute__((pure)) _Float16 __ocml_pown_f16(_Float16, int); typedef _Float16 __2f16 __attribute__((ext_vector_type(2))); typedef short __2i16 __attribute__((ext_vector_type(2))); // We need to match C99's bool and get an i1 in the IR. #ifdef __cplusplus typedef bool __ockl_bool; #else typedef _Bool __ockl_bool; #endif __device__ __attribute__((const)) float __ockl_fdot2(__2f16 a, __2f16 b, float c, __ockl_bool s); __device__ __attribute__((const)) __2f16 __ocml_ceil_2f16(__2f16); __device__ __attribute__((const)) __2f16 __ocml_fabs_2f16(__2f16); __device__ __2f16 __ocml_cos_2f16(__2f16); __device__ __attribute__((pure)) __2f16 __ocml_exp_2f16(__2f16); __device__ __attribute__((pure)) __2f16 __ocml_exp10_2f16(__2f16); __device__ __attribute__((pure)) __2f16 __ocml_exp2_2f16(__2f16); __device__ __attribute__((const)) __2f16 __ocml_floor_2f16(__2f16); __device__ __attribute__((const)) __2f16 __ocml_fma_2f16(__2f16, __2f16, __2f16); __device__ __attribute__((const)) __2i16 __ocml_isinf_2f16(__2f16); __device__ __attribute__((const)) __2i16 __ocml_isnan_2f16(__2f16); __device__ __attribute__((pure)) __2f16 __ocml_log_2f16(__2f16); __device__ __attribute__((pure)) __2f16 __ocml_log10_2f16(__2f16); __device__ __attribute__((pure)) __2f16 __ocml_log2_2f16(__2f16); #if HIP_VERSION_MAJOR * 100 + HIP_VERSION_MINOR >= 560 #define __DEPRECATED_SINCE_HIP_560(X) __attribute__((deprecated(X))) #else #define __DEPRECATED_SINCE_HIP_560(X) #endif // Deprecated, should be removed when rocm releases using it are no longer // relevant. __DEPRECATED_SINCE_HIP_560("use ((_Float16)1.0) / ") __device__ inline _Float16 __llvm_amdgcn_rcp_f16(_Float16 x) { return ((_Float16)1.0f) / x; } __DEPRECATED_SINCE_HIP_560("use ((__2f16)1.0) / ") __device__ inline __2f16 __llvm_amdgcn_rcp_2f16(__2f16 __x) { return ((__2f16)1.0f) / __x; } #undef __DEPRECATED_SINCE_HIP_560 __device__ __attribute__((const)) __2f16 __ocml_rint_2f16(__2f16); __device__ __attribute__((const)) __2f16 __ocml_rsqrt_2f16(__2f16); __device__ __2f16 __ocml_sin_2f16(__2f16); __device__ __attribute__((const)) __2f16 __ocml_sqrt_2f16(__2f16); __device__ __attribute__((const)) __2f16 __ocml_trunc_2f16(__2f16); __device__ __attribute__((const)) __2f16 __ocml_pown_2f16(__2f16, __2i16); #ifdef __cplusplus } // extern "C" #endif #endif // __CLANG_HIP_LIBDEVICE_DECLARES_H__ /*===---- __clang_hip_math.h - Device-side HIP math support ----------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CLANG_HIP_MATH_H__ #define __CLANG_HIP_MATH_H__ #if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__) #error "This file is for HIP and OpenMP AMDGCN device compilation only." #endif #if !defined(__HIPCC_RTC__) #if defined(__cplusplus) #include #endif #include #include #ifdef __OPENMP_AMDGCN__ #include #endif #endif // !defined(__HIPCC_RTC__) #pragma push_macro("__DEVICE__") #ifdef __OPENMP_AMDGCN__ #define __DEVICE__ static inline __attribute__((always_inline, nothrow)) #else #define __DEVICE__ static __device__ inline __attribute__((always_inline)) #endif // A few functions return bool type starting only in C++11. #pragma push_macro("__RETURN_TYPE") #ifdef __OPENMP_AMDGCN__ #define __RETURN_TYPE int #else #if defined(__cplusplus) #define __RETURN_TYPE bool #else #define __RETURN_TYPE int #endif #endif // __OPENMP_AMDGCN__ #if defined (__cplusplus) && __cplusplus < 201103L // emulate static_assert on type sizes template struct __compare_result{}; template<> struct __compare_result { static const __device__ bool valid; }; __DEVICE__ void __suppress_unused_warning(bool b){}; template __DEVICE__ void __static_assert_equal_size() { __suppress_unused_warning(__compare_result::valid); } #define __static_assert_type_size_equal(A, B) \ __static_assert_equal_size() #else #define __static_assert_type_size_equal(A,B) \ static_assert((A) == (B), "") #endif __DEVICE__ uint64_t __make_mantissa_base8(const char *__tagp __attribute__((nonnull))) { uint64_t __r = 0; while (*__tagp != '\0') { char __tmp = *__tagp; if (__tmp >= '0' && __tmp <= '7') __r = (__r * 8u) + __tmp - '0'; else return 0; ++__tagp; } return __r; } __DEVICE__ uint64_t __make_mantissa_base10(const char *__tagp __attribute__((nonnull))) { uint64_t __r = 0; while (*__tagp != '\0') { char __tmp = *__tagp; if (__tmp >= '0' && __tmp <= '9') __r = (__r * 10u) + __tmp - '0'; else return 0; ++__tagp; } return __r; } __DEVICE__ uint64_t __make_mantissa_base16(const char *__tagp __attribute__((nonnull))) { uint64_t __r = 0; while (*__tagp != '\0') { char __tmp = *__tagp; if (__tmp >= '0' && __tmp <= '9') __r = (__r * 16u) + __tmp - '0'; else if (__tmp >= 'a' && __tmp <= 'f') __r = (__r * 16u) + __tmp - 'a' + 10; else if (__tmp >= 'A' && __tmp <= 'F') __r = (__r * 16u) + __tmp - 'A' + 10; else return 0; ++__tagp; } return __r; } __DEVICE__ uint64_t __make_mantissa(const char *__tagp __attribute__((nonnull))) { if (*__tagp == '0') { ++__tagp; if (*__tagp == 'x' || *__tagp == 'X') return __make_mantissa_base16(__tagp); else return __make_mantissa_base8(__tagp); } return __make_mantissa_base10(__tagp); } // BEGIN FLOAT #if defined(__cplusplus) __DEVICE__ int abs(int __x) { int __sgn = __x >> (sizeof(int) * CHAR_BIT - 1); return (__x ^ __sgn) - __sgn; } __DEVICE__ long labs(long __x) { long __sgn = __x >> (sizeof(long) * CHAR_BIT - 1); return (__x ^ __sgn) - __sgn; } __DEVICE__ long long llabs(long long __x) { long long __sgn = __x >> (sizeof(long long) * CHAR_BIT - 1); return (__x ^ __sgn) - __sgn; } #endif __DEVICE__ float acosf(float __x) { return __ocml_acos_f32(__x); } __DEVICE__ float acoshf(float __x) { return __ocml_acosh_f32(__x); } __DEVICE__ float asinf(float __x) { return __ocml_asin_f32(__x); } __DEVICE__ float asinhf(float __x) { return __ocml_asinh_f32(__x); } __DEVICE__ float atan2f(float __x, float __y) { return __ocml_atan2_f32(__x, __y); } __DEVICE__ float atanf(float __x) { return __ocml_atan_f32(__x); } __DEVICE__ float atanhf(float __x) { return __ocml_atanh_f32(__x); } __DEVICE__ float cbrtf(float __x) { return __ocml_cbrt_f32(__x); } __DEVICE__ float ceilf(float __x) { return __builtin_ceilf(__x); } __DEVICE__ float copysignf(float __x, float __y) { return __builtin_copysignf(__x, __y); } __DEVICE__ float cosf(float __x) { return __ocml_cos_f32(__x); } __DEVICE__ float coshf(float __x) { return __ocml_cosh_f32(__x); } __DEVICE__ float cospif(float __x) { return __ocml_cospi_f32(__x); } __DEVICE__ float cyl_bessel_i0f(float __x) { return __ocml_i0_f32(__x); } __DEVICE__ float cyl_bessel_i1f(float __x) { return __ocml_i1_f32(__x); } __DEVICE__ float erfcf(float __x) { return __ocml_erfc_f32(__x); } __DEVICE__ float erfcinvf(float __x) { return __ocml_erfcinv_f32(__x); } __DEVICE__ float erfcxf(float __x) { return __ocml_erfcx_f32(__x); } __DEVICE__ float erff(float __x) { return __ocml_erf_f32(__x); } __DEVICE__ float erfinvf(float __x) { return __ocml_erfinv_f32(__x); } __DEVICE__ float exp10f(float __x) { return __ocml_exp10_f32(__x); } __DEVICE__ float exp2f(float __x) { return __builtin_exp2f(__x); } __DEVICE__ float expf(float __x) { return __builtin_expf(__x); } __DEVICE__ float expm1f(float __x) { return __ocml_expm1_f32(__x); } __DEVICE__ float fabsf(float __x) { return __builtin_fabsf(__x); } __DEVICE__ float fdimf(float __x, float __y) { return __ocml_fdim_f32(__x, __y); } __DEVICE__ float fdividef(float __x, float __y) { return __x / __y; } __DEVICE__ float floorf(float __x) { return __builtin_floorf(__x); } __DEVICE__ float fmaf(float __x, float __y, float __z) { return __builtin_fmaf(__x, __y, __z); } __DEVICE__ float fmaxf(float __x, float __y) { return __builtin_fmaxf(__x, __y); } __DEVICE__ float fminf(float __x, float __y) { return __builtin_fminf(__x, __y); } __DEVICE__ float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); } __DEVICE__ float frexpf(float __x, int *__nptr) { return __builtin_frexpf(__x, __nptr); } __DEVICE__ float hypotf(float __x, float __y) { return __ocml_hypot_f32(__x, __y); } __DEVICE__ int ilogbf(float __x) { return __ocml_ilogb_f32(__x); } __DEVICE__ __RETURN_TYPE __finitef(float __x) { return __builtin_isfinite(__x); } __DEVICE__ __RETURN_TYPE __isinff(float __x) { return __builtin_isinf(__x); } __DEVICE__ __RETURN_TYPE __isnanf(float __x) { return __builtin_isnan(__x); } __DEVICE__ float j0f(float __x) { return __ocml_j0_f32(__x); } __DEVICE__ float j1f(float __x) { return __ocml_j1_f32(__x); } __DEVICE__ float jnf(int __n, float __x) { // TODO: we could use Ahmes multiplication // and the Miller & Brown algorithm // for linear recurrences to get O(log n) steps, but it's unclear if // it'd be beneficial in this case. if (__n == 0) return j0f(__x); if (__n == 1) return j1f(__x); float __x0 = j0f(__x); float __x1 = j1f(__x); for (int __i = 1; __i < __n; ++__i) { float __x2 = (2 * __i) / __x * __x1 - __x0; __x0 = __x1; __x1 = __x2; } return __x1; } __DEVICE__ float ldexpf(float __x, int __e) { return __builtin_amdgcn_ldexpf(__x, __e); } __DEVICE__ float lgammaf(float __x) { return __ocml_lgamma_f32(__x); } __DEVICE__ long long int llrintf(float __x) { return __builtin_rintf(__x); } __DEVICE__ long long int llroundf(float __x) { return __builtin_roundf(__x); } __DEVICE__ float log10f(float __x) { return __builtin_log10f(__x); } __DEVICE__ float log1pf(float __x) { return __ocml_log1p_f32(__x); } __DEVICE__ float log2f(float __x) { return __builtin_log2f(__x); } __DEVICE__ float logbf(float __x) { return __ocml_logb_f32(__x); } __DEVICE__ float logf(float __x) { return __builtin_logf(__x); } __DEVICE__ long int lrintf(float __x) { return __builtin_rintf(__x); } __DEVICE__ long int lroundf(float __x) { return __builtin_roundf(__x); } __DEVICE__ float modff(float __x, float *__iptr) { float __tmp; #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif float __r = __ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp); *__iptr = __tmp; return __r; } __DEVICE__ float nanf(const char *__tagp __attribute__((nonnull))) { union { float val; struct ieee_float { unsigned int mantissa : 22; unsigned int quiet : 1; unsigned int exponent : 8; unsigned int sign : 1; } bits; } __tmp; __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits)); __tmp.bits.sign = 0u; __tmp.bits.exponent = ~0u; __tmp.bits.quiet = 1u; __tmp.bits.mantissa = __make_mantissa(__tagp); return __tmp.val; } __DEVICE__ float nearbyintf(float __x) { return __builtin_nearbyintf(__x); } __DEVICE__ float nextafterf(float __x, float __y) { return __ocml_nextafter_f32(__x, __y); } __DEVICE__ float norm3df(float __x, float __y, float __z) { return __ocml_len3_f32(__x, __y, __z); } __DEVICE__ float norm4df(float __x, float __y, float __z, float __w) { return __ocml_len4_f32(__x, __y, __z, __w); } __DEVICE__ float normcdff(float __x) { return __ocml_ncdf_f32(__x); } __DEVICE__ float normcdfinvf(float __x) { return __ocml_ncdfinv_f32(__x); } __DEVICE__ float normf(int __dim, const float *__a) { // TODO: placeholder until OCML adds support. float __r = 0; while (__dim--) { __r += __a[0] * __a[0]; ++__a; } return __ocml_sqrt_f32(__r); } __DEVICE__ float powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); } __DEVICE__ float powif(float __x, int __y) { return __ocml_pown_f32(__x, __y); } __DEVICE__ float rcbrtf(float __x) { return __ocml_rcbrt_f32(__x); } __DEVICE__ float remainderf(float __x, float __y) { return __ocml_remainder_f32(__x, __y); } __DEVICE__ float remquof(float __x, float __y, int *__quo) { int __tmp; #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif float __r = __ocml_remquo_f32( __x, __y, (__attribute__((address_space(5))) int *)&__tmp); *__quo = __tmp; return __r; } __DEVICE__ float rhypotf(float __x, float __y) { return __ocml_rhypot_f32(__x, __y); } __DEVICE__ float rintf(float __x) { return __builtin_rintf(__x); } __DEVICE__ float rnorm3df(float __x, float __y, float __z) { return __ocml_rlen3_f32(__x, __y, __z); } __DEVICE__ float rnorm4df(float __x, float __y, float __z, float __w) { return __ocml_rlen4_f32(__x, __y, __z, __w); } __DEVICE__ float rnormf(int __dim, const float *__a) { // TODO: placeholder until OCML adds support. float __r = 0; while (__dim--) { __r += __a[0] * __a[0]; ++__a; } return __ocml_rsqrt_f32(__r); } __DEVICE__ float roundf(float __x) { return __builtin_roundf(__x); } __DEVICE__ float rsqrtf(float __x) { return __ocml_rsqrt_f32(__x); } __DEVICE__ float scalblnf(float __x, long int __n) { return (__n < INT_MAX) ? __builtin_amdgcn_ldexpf(__x, __n) : __ocml_scalb_f32(__x, __n); } __DEVICE__ float scalbnf(float __x, int __n) { return __builtin_amdgcn_ldexpf(__x, __n); } __DEVICE__ __RETURN_TYPE __signbitf(float __x) { return __builtin_signbitf(__x); } __DEVICE__ void sincosf(float __x, float *__sinptr, float *__cosptr) { float __tmp; #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif *__sinptr = __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp); *__cosptr = __tmp; } __DEVICE__ void sincospif(float __x, float *__sinptr, float *__cosptr) { float __tmp; #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif *__sinptr = __ocml_sincospi_f32( __x, (__attribute__((address_space(5))) float *)&__tmp); *__cosptr = __tmp; } __DEVICE__ float sinf(float __x) { return __ocml_sin_f32(__x); } __DEVICE__ float sinhf(float __x) { return __ocml_sinh_f32(__x); } __DEVICE__ float sinpif(float __x) { return __ocml_sinpi_f32(__x); } __DEVICE__ float sqrtf(float __x) { return __ocml_sqrt_f32(__x); } __DEVICE__ float tanf(float __x) { return __ocml_tan_f32(__x); } __DEVICE__ float tanhf(float __x) { return __ocml_tanh_f32(__x); } __DEVICE__ float tgammaf(float __x) { return __ocml_tgamma_f32(__x); } __DEVICE__ float truncf(float __x) { return __builtin_truncf(__x); } __DEVICE__ float y0f(float __x) { return __ocml_y0_f32(__x); } __DEVICE__ float y1f(float __x) { return __ocml_y1_f32(__x); } __DEVICE__ float ynf(int __n, float __x) { // TODO: we could use Ahmes multiplication // and the Miller & Brown algorithm // for linear recurrences to get O(log n) steps, but it's unclear if // it'd be beneficial in this case. Placeholder until OCML adds // support. if (__n == 0) return y0f(__x); if (__n == 1) return y1f(__x); float __x0 = y0f(__x); float __x1 = y1f(__x); for (int __i = 1; __i < __n; ++__i) { float __x2 = (2 * __i) / __x * __x1 - __x0; __x0 = __x1; __x1 = __x2; } return __x1; } // BEGIN INTRINSICS __DEVICE__ float __cosf(float __x) { return __ocml_native_cos_f32(__x); } __DEVICE__ float __exp10f(float __x) { return __ocml_native_exp10_f32(__x); } __DEVICE__ float __expf(float __x) { return __ocml_native_exp_f32(__x); } #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); } __DEVICE__ float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); } __DEVICE__ float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); } __DEVICE__ float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); } #else __DEVICE__ float __fadd_rn(float __x, float __y) { return __x + __y; } #endif #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); } __DEVICE__ float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); } __DEVICE__ float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); } __DEVICE__ float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); } #else __DEVICE__ float __fdiv_rn(float __x, float __y) { return __x / __y; } #endif __DEVICE__ float __fdividef(float __x, float __y) { return __x / __y; } #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ float __fmaf_rd(float __x, float __y, float __z) { return __ocml_fma_rtn_f32(__x, __y, __z); } __DEVICE__ float __fmaf_rn(float __x, float __y, float __z) { return __ocml_fma_rte_f32(__x, __y, __z); } __DEVICE__ float __fmaf_ru(float __x, float __y, float __z) { return __ocml_fma_rtp_f32(__x, __y, __z); } __DEVICE__ float __fmaf_rz(float __x, float __y, float __z) { return __ocml_fma_rtz_f32(__x, __y, __z); } #else __DEVICE__ float __fmaf_rn(float __x, float __y, float __z) { return __builtin_fmaf(__x, __y, __z); } #endif #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); } __DEVICE__ float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); } __DEVICE__ float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); } __DEVICE__ float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); } #else __DEVICE__ float __fmul_rn(float __x, float __y) { return __x * __y; } #endif #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ float __frcp_rd(float __x) { return __ocml_div_rtn_f32(1.0f, __x); } __DEVICE__ float __frcp_rn(float __x) { return __ocml_div_rte_f32(1.0f, __x); } __DEVICE__ float __frcp_ru(float __x) { return __ocml_div_rtp_f32(1.0f, __x); } __DEVICE__ float __frcp_rz(float __x) { return __ocml_div_rtz_f32(1.0f, __x); } #else __DEVICE__ float __frcp_rn(float __x) { return 1.0f / __x; } #endif __DEVICE__ float __frsqrt_rn(float __x) { return __builtin_amdgcn_rsqf(__x); } #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); } __DEVICE__ float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); } __DEVICE__ float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); } __DEVICE__ float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); } #else __DEVICE__ float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); } #endif #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); } __DEVICE__ float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); } __DEVICE__ float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); } __DEVICE__ float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); } #else __DEVICE__ float __fsub_rn(float __x, float __y) { return __x - __y; } #endif __DEVICE__ float __log10f(float __x) { return __ocml_native_log10_f32(__x); } __DEVICE__ float __log2f(float __x) { return __ocml_native_log2_f32(__x); } __DEVICE__ float __logf(float __x) { return __ocml_native_log_f32(__x); } __DEVICE__ float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); } __DEVICE__ float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); } __DEVICE__ void __sincosf(float __x, float *__sinptr, float *__cosptr) { *__sinptr = __ocml_native_sin_f32(__x); *__cosptr = __ocml_native_cos_f32(__x); } __DEVICE__ float __sinf(float __x) { return __ocml_native_sin_f32(__x); } __DEVICE__ float __tanf(float __x) { return __ocml_tan_f32(__x); } // END INTRINSICS // END FLOAT // BEGIN DOUBLE __DEVICE__ double acos(double __x) { return __ocml_acos_f64(__x); } __DEVICE__ double acosh(double __x) { return __ocml_acosh_f64(__x); } __DEVICE__ double asin(double __x) { return __ocml_asin_f64(__x); } __DEVICE__ double asinh(double __x) { return __ocml_asinh_f64(__x); } __DEVICE__ double atan(double __x) { return __ocml_atan_f64(__x); } __DEVICE__ double atan2(double __x, double __y) { return __ocml_atan2_f64(__x, __y); } __DEVICE__ double atanh(double __x) { return __ocml_atanh_f64(__x); } __DEVICE__ double cbrt(double __x) { return __ocml_cbrt_f64(__x); } __DEVICE__ double ceil(double __x) { return __builtin_ceil(__x); } __DEVICE__ double copysign(double __x, double __y) { return __builtin_copysign(__x, __y); } __DEVICE__ double cos(double __x) { return __ocml_cos_f64(__x); } __DEVICE__ double cosh(double __x) { return __ocml_cosh_f64(__x); } __DEVICE__ double cospi(double __x) { return __ocml_cospi_f64(__x); } __DEVICE__ double cyl_bessel_i0(double __x) { return __ocml_i0_f64(__x); } __DEVICE__ double cyl_bessel_i1(double __x) { return __ocml_i1_f64(__x); } __DEVICE__ double erf(double __x) { return __ocml_erf_f64(__x); } __DEVICE__ double erfc(double __x) { return __ocml_erfc_f64(__x); } __DEVICE__ double erfcinv(double __x) { return __ocml_erfcinv_f64(__x); } __DEVICE__ double erfcx(double __x) { return __ocml_erfcx_f64(__x); } __DEVICE__ double erfinv(double __x) { return __ocml_erfinv_f64(__x); } __DEVICE__ double exp(double __x) { return __ocml_exp_f64(__x); } __DEVICE__ double exp10(double __x) { return __ocml_exp10_f64(__x); } __DEVICE__ double exp2(double __x) { return __ocml_exp2_f64(__x); } __DEVICE__ double expm1(double __x) { return __ocml_expm1_f64(__x); } __DEVICE__ double fabs(double __x) { return __builtin_fabs(__x); } __DEVICE__ double fdim(double __x, double __y) { return __ocml_fdim_f64(__x, __y); } __DEVICE__ double floor(double __x) { return __builtin_floor(__x); } __DEVICE__ double fma(double __x, double __y, double __z) { return __builtin_fma(__x, __y, __z); } __DEVICE__ double fmax(double __x, double __y) { return __builtin_fmax(__x, __y); } __DEVICE__ double fmin(double __x, double __y) { return __builtin_fmin(__x, __y); } __DEVICE__ double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); } __DEVICE__ double frexp(double __x, int *__nptr) { return __builtin_frexp(__x, __nptr); } __DEVICE__ double hypot(double __x, double __y) { return __ocml_hypot_f64(__x, __y); } __DEVICE__ int ilogb(double __x) { return __ocml_ilogb_f64(__x); } __DEVICE__ __RETURN_TYPE __finite(double __x) { return __builtin_isfinite(__x); } __DEVICE__ __RETURN_TYPE __isinf(double __x) { return __builtin_isinf(__x); } __DEVICE__ __RETURN_TYPE __isnan(double __x) { return __builtin_isnan(__x); } __DEVICE__ double j0(double __x) { return __ocml_j0_f64(__x); } __DEVICE__ double j1(double __x) { return __ocml_j1_f64(__x); } __DEVICE__ double jn(int __n, double __x) { // TODO: we could use Ahmes multiplication // and the Miller & Brown algorithm // for linear recurrences to get O(log n) steps, but it's unclear if // it'd be beneficial in this case. Placeholder until OCML adds // support. if (__n == 0) return j0(__x); if (__n == 1) return j1(__x); double __x0 = j0(__x); double __x1 = j1(__x); for (int __i = 1; __i < __n; ++__i) { double __x2 = (2 * __i) / __x * __x1 - __x0; __x0 = __x1; __x1 = __x2; } return __x1; } __DEVICE__ double ldexp(double __x, int __e) { return __builtin_amdgcn_ldexp(__x, __e); } __DEVICE__ double lgamma(double __x) { return __ocml_lgamma_f64(__x); } __DEVICE__ long long int llrint(double __x) { return __builtin_rint(__x); } __DEVICE__ long long int llround(double __x) { return __builtin_round(__x); } __DEVICE__ double log(double __x) { return __ocml_log_f64(__x); } __DEVICE__ double log10(double __x) { return __ocml_log10_f64(__x); } __DEVICE__ double log1p(double __x) { return __ocml_log1p_f64(__x); } __DEVICE__ double log2(double __x) { return __ocml_log2_f64(__x); } __DEVICE__ double logb(double __x) { return __ocml_logb_f64(__x); } __DEVICE__ long int lrint(double __x) { return __builtin_rint(__x); } __DEVICE__ long int lround(double __x) { return __builtin_round(__x); } __DEVICE__ double modf(double __x, double *__iptr) { double __tmp; #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif double __r = __ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp); *__iptr = __tmp; return __r; } __DEVICE__ double nan(const char *__tagp) { #if !_WIN32 union { double val; struct ieee_double { uint64_t mantissa : 51; uint32_t quiet : 1; uint32_t exponent : 11; uint32_t sign : 1; } bits; } __tmp; __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits)); __tmp.bits.sign = 0u; __tmp.bits.exponent = ~0u; __tmp.bits.quiet = 1u; __tmp.bits.mantissa = __make_mantissa(__tagp); return __tmp.val; #else __static_assert_type_size_equal(sizeof(uint64_t), sizeof(double)); uint64_t __val = __make_mantissa(__tagp); __val |= 0xFFF << 51; return *reinterpret_cast(&__val); #endif } __DEVICE__ double nearbyint(double __x) { return __builtin_nearbyint(__x); } __DEVICE__ double nextafter(double __x, double __y) { return __ocml_nextafter_f64(__x, __y); } __DEVICE__ double norm(int __dim, const double *__a) { // TODO: placeholder until OCML adds support. double __r = 0; while (__dim--) { __r += __a[0] * __a[0]; ++__a; } return __ocml_sqrt_f64(__r); } __DEVICE__ double norm3d(double __x, double __y, double __z) { return __ocml_len3_f64(__x, __y, __z); } __DEVICE__ double norm4d(double __x, double __y, double __z, double __w) { return __ocml_len4_f64(__x, __y, __z, __w); } __DEVICE__ double normcdf(double __x) { return __ocml_ncdf_f64(__x); } __DEVICE__ double normcdfinv(double __x) { return __ocml_ncdfinv_f64(__x); } __DEVICE__ double pow(double __x, double __y) { return __ocml_pow_f64(__x, __y); } __DEVICE__ double powi(double __x, int __y) { return __ocml_pown_f64(__x, __y); } __DEVICE__ double rcbrt(double __x) { return __ocml_rcbrt_f64(__x); } __DEVICE__ double remainder(double __x, double __y) { return __ocml_remainder_f64(__x, __y); } __DEVICE__ double remquo(double __x, double __y, int *__quo) { int __tmp; #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif double __r = __ocml_remquo_f64( __x, __y, (__attribute__((address_space(5))) int *)&__tmp); *__quo = __tmp; return __r; } __DEVICE__ double rhypot(double __x, double __y) { return __ocml_rhypot_f64(__x, __y); } __DEVICE__ double rint(double __x) { return __builtin_rint(__x); } __DEVICE__ double rnorm(int __dim, const double *__a) { // TODO: placeholder until OCML adds support. double __r = 0; while (__dim--) { __r += __a[0] * __a[0]; ++__a; } return __ocml_rsqrt_f64(__r); } __DEVICE__ double rnorm3d(double __x, double __y, double __z) { return __ocml_rlen3_f64(__x, __y, __z); } __DEVICE__ double rnorm4d(double __x, double __y, double __z, double __w) { return __ocml_rlen4_f64(__x, __y, __z, __w); } __DEVICE__ double round(double __x) { return __builtin_round(__x); } __DEVICE__ double rsqrt(double __x) { return __ocml_rsqrt_f64(__x); } __DEVICE__ double scalbln(double __x, long int __n) { return (__n < INT_MAX) ? __builtin_amdgcn_ldexp(__x, __n) : __ocml_scalb_f64(__x, __n); } __DEVICE__ double scalbn(double __x, int __n) { return __builtin_amdgcn_ldexp(__x, __n); } __DEVICE__ __RETURN_TYPE __signbit(double __x) { return __builtin_signbit(__x); } __DEVICE__ double sin(double __x) { return __ocml_sin_f64(__x); } __DEVICE__ void sincos(double __x, double *__sinptr, double *__cosptr) { double __tmp; #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif *__sinptr = __ocml_sincos_f64( __x, (__attribute__((address_space(5))) double *)&__tmp); *__cosptr = __tmp; } __DEVICE__ void sincospi(double __x, double *__sinptr, double *__cosptr) { double __tmp; #ifdef __OPENMP_AMDGCN__ #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) #endif *__sinptr = __ocml_sincospi_f64( __x, (__attribute__((address_space(5))) double *)&__tmp); *__cosptr = __tmp; } __DEVICE__ double sinh(double __x) { return __ocml_sinh_f64(__x); } __DEVICE__ double sinpi(double __x) { return __ocml_sinpi_f64(__x); } __DEVICE__ double sqrt(double __x) { return __ocml_sqrt_f64(__x); } __DEVICE__ double tan(double __x) { return __ocml_tan_f64(__x); } __DEVICE__ double tanh(double __x) { return __ocml_tanh_f64(__x); } __DEVICE__ double tgamma(double __x) { return __ocml_tgamma_f64(__x); } __DEVICE__ double trunc(double __x) { return __builtin_trunc(__x); } __DEVICE__ double y0(double __x) { return __ocml_y0_f64(__x); } __DEVICE__ double y1(double __x) { return __ocml_y1_f64(__x); } __DEVICE__ double yn(int __n, double __x) { // TODO: we could use Ahmes multiplication // and the Miller & Brown algorithm // for linear recurrences to get O(log n) steps, but it's unclear if // it'd be beneficial in this case. Placeholder until OCML adds // support. if (__n == 0) return y0(__x); if (__n == 1) return y1(__x); double __x0 = y0(__x); double __x1 = y1(__x); for (int __i = 1; __i < __n; ++__i) { double __x2 = (2 * __i) / __x * __x1 - __x0; __x0 = __x1; __x1 = __x2; } return __x1; } // BEGIN INTRINSICS #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ double __dadd_rd(double __x, double __y) { return __ocml_add_rtn_f64(__x, __y); } __DEVICE__ double __dadd_rn(double __x, double __y) { return __ocml_add_rte_f64(__x, __y); } __DEVICE__ double __dadd_ru(double __x, double __y) { return __ocml_add_rtp_f64(__x, __y); } __DEVICE__ double __dadd_rz(double __x, double __y) { return __ocml_add_rtz_f64(__x, __y); } #else __DEVICE__ double __dadd_rn(double __x, double __y) { return __x + __y; } #endif #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ double __ddiv_rd(double __x, double __y) { return __ocml_div_rtn_f64(__x, __y); } __DEVICE__ double __ddiv_rn(double __x, double __y) { return __ocml_div_rte_f64(__x, __y); } __DEVICE__ double __ddiv_ru(double __x, double __y) { return __ocml_div_rtp_f64(__x, __y); } __DEVICE__ double __ddiv_rz(double __x, double __y) { return __ocml_div_rtz_f64(__x, __y); } #else __DEVICE__ double __ddiv_rn(double __x, double __y) { return __x / __y; } #endif #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ double __dmul_rd(double __x, double __y) { return __ocml_mul_rtn_f64(__x, __y); } __DEVICE__ double __dmul_rn(double __x, double __y) { return __ocml_mul_rte_f64(__x, __y); } __DEVICE__ double __dmul_ru(double __x, double __y) { return __ocml_mul_rtp_f64(__x, __y); } __DEVICE__ double __dmul_rz(double __x, double __y) { return __ocml_mul_rtz_f64(__x, __y); } #else __DEVICE__ double __dmul_rn(double __x, double __y) { return __x * __y; } #endif #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ double __drcp_rd(double __x) { return __ocml_div_rtn_f64(1.0, __x); } __DEVICE__ double __drcp_rn(double __x) { return __ocml_div_rte_f64(1.0, __x); } __DEVICE__ double __drcp_ru(double __x) { return __ocml_div_rtp_f64(1.0, __x); } __DEVICE__ double __drcp_rz(double __x) { return __ocml_div_rtz_f64(1.0, __x); } #else __DEVICE__ double __drcp_rn(double __x) { return 1.0 / __x; } #endif #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ double __dsqrt_rd(double __x) { return __ocml_sqrt_rtn_f64(__x); } __DEVICE__ double __dsqrt_rn(double __x) { return __ocml_sqrt_rte_f64(__x); } __DEVICE__ double __dsqrt_ru(double __x) { return __ocml_sqrt_rtp_f64(__x); } __DEVICE__ double __dsqrt_rz(double __x) { return __ocml_sqrt_rtz_f64(__x); } #else __DEVICE__ double __dsqrt_rn(double __x) { return __ocml_sqrt_f64(__x); } #endif #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ double __dsub_rd(double __x, double __y) { return __ocml_sub_rtn_f64(__x, __y); } __DEVICE__ double __dsub_rn(double __x, double __y) { return __ocml_sub_rte_f64(__x, __y); } __DEVICE__ double __dsub_ru(double __x, double __y) { return __ocml_sub_rtp_f64(__x, __y); } __DEVICE__ double __dsub_rz(double __x, double __y) { return __ocml_sub_rtz_f64(__x, __y); } #else __DEVICE__ double __dsub_rn(double __x, double __y) { return __x - __y; } #endif #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ double __fma_rd(double __x, double __y, double __z) { return __ocml_fma_rtn_f64(__x, __y, __z); } __DEVICE__ double __fma_rn(double __x, double __y, double __z) { return __ocml_fma_rte_f64(__x, __y, __z); } __DEVICE__ double __fma_ru(double __x, double __y, double __z) { return __ocml_fma_rtp_f64(__x, __y, __z); } __DEVICE__ double __fma_rz(double __x, double __y, double __z) { return __ocml_fma_rtz_f64(__x, __y, __z); } #else __DEVICE__ double __fma_rn(double __x, double __y, double __z) { return __builtin_fma(__x, __y, __z); } #endif // END INTRINSICS // END DOUBLE // C only macros #if !defined(__cplusplus) && __STDC_VERSION__ >= 201112L #define isfinite(__x) _Generic((__x), float : __finitef, double : __finite)(__x) #define isinf(__x) _Generic((__x), float : __isinff, double : __isinf)(__x) #define isnan(__x) _Generic((__x), float : __isnanf, double : __isnan)(__x) #define signbit(__x) \ _Generic((__x), float : __signbitf, double : __signbit)(__x) #endif // !defined(__cplusplus) && __STDC_VERSION__ >= 201112L #if defined(__cplusplus) template __DEVICE__ T min(T __arg1, T __arg2) { return (__arg1 < __arg2) ? __arg1 : __arg2; } template __DEVICE__ T max(T __arg1, T __arg2) { return (__arg1 > __arg2) ? __arg1 : __arg2; } __DEVICE__ int min(int __arg1, int __arg2) { return (__arg1 < __arg2) ? __arg1 : __arg2; } __DEVICE__ int max(int __arg1, int __arg2) { return (__arg1 > __arg2) ? __arg1 : __arg2; } __DEVICE__ float max(float __x, float __y) { return __builtin_fmaxf(__x, __y); } __DEVICE__ double max(double __x, double __y) { return __builtin_fmax(__x, __y); } __DEVICE__ float min(float __x, float __y) { return __builtin_fminf(__x, __y); } __DEVICE__ double min(double __x, double __y) { return __builtin_fmin(__x, __y); } #if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__) __host__ inline static int min(int __arg1, int __arg2) { return std::min(__arg1, __arg2); } __host__ inline static int max(int __arg1, int __arg2) { return std::max(__arg1, __arg2); } #endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__) #endif #pragma pop_macro("__DEVICE__") #pragma pop_macro("__RETURN_TYPE") #endif // __CLANG_HIP_MATH_H__ /builtins/__clang_hip_runtime_wrapper.h/*===---- __clang_hip_runtime_wrapper.h - HIP runtime support ---------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ /* * WARNING: This header is intended to be directly -include'd by * the compiler and is not supposed to be included by users. * */ #ifndef __CLANG_HIP_RUNTIME_WRAPPER_H__ #define __CLANG_HIP_RUNTIME_WRAPPER_H__ #if __HIP__ #define __host__ __attribute__((host)) #define __device__ __attribute__((device)) #define __global__ __attribute__((global)) #define __shared__ __attribute__((shared)) #define __constant__ __attribute__((constant)) #define __managed__ __attribute__((managed)) #if !defined(__cplusplus) || __cplusplus < 201103L #define nullptr NULL; #endif #ifdef __cplusplus extern "C" { __attribute__((__visibility__("default"))) __attribute__((weak)) __attribute__((noreturn)) __device__ void __cxa_pure_virtual(void) { __builtin_trap(); } __attribute__((__visibility__("default"))) __attribute__((weak)) __attribute__((noreturn)) __device__ void __cxa_deleted_virtual(void) { __builtin_trap(); } } #endif //__cplusplus #if !defined(__HIPCC_RTC__) #include #include #include #if __has_include("hip/hip_version.h") #include "hip/hip_version.h" #endif // __has_include("hip/hip_version.h") #else typedef __SIZE_TYPE__ size_t; // Define macros which are needed to declare HIP device API's without standard // C/C++ headers. This is for readability so that these API's can be written // the same way as non-hipRTC use case. These macros need to be popped so that // they do not pollute users' name space. #pragma push_macro("NULL") #pragma push_macro("uint32_t") #pragma push_macro("uint64_t") #pragma push_macro("CHAR_BIT") #pragma push_macro("INT_MAX") #define NULL (void *)0 #define uint32_t __UINT32_TYPE__ #define uint64_t __UINT64_TYPE__ #define CHAR_BIT __CHAR_BIT__ #define INT_MAX __INTMAX_MAX__ #endif // __HIPCC_RTC__ typedef __SIZE_TYPE__ __hip_size_t; #ifdef __cplusplus extern "C" { #endif //__cplusplus #if HIP_VERSION_MAJOR * 100 + HIP_VERSION_MINOR >= 405 extern "C" __device__ unsigned long long __ockl_dm_alloc(unsigned long long __size); extern "C" __device__ void __ockl_dm_dealloc(unsigned long long __addr); #if __has_feature(address_sanitizer) extern "C" __device__ unsigned long long __asan_malloc_impl(unsigned long long __size, unsigned long long __pc); extern "C" __device__ void __asan_free_impl(unsigned long long __addr, unsigned long long __pc); __attribute__((noinline, weak)) __device__ void *malloc(__hip_size_t __size) { unsigned long long __pc = (unsigned long long)__builtin_return_address(0); return (void *)__asan_malloc_impl(__size, __pc); } __attribute__((noinline, weak)) __device__ void free(void *__ptr) { unsigned long long __pc = (unsigned long long)__builtin_return_address(0); __asan_free_impl((unsigned long long)__ptr, __pc); } #else __attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) { return (void *) __ockl_dm_alloc(__size); } __attribute__((weak)) inline __device__ void free(void *__ptr) { __ockl_dm_dealloc((unsigned long long)__ptr); } #endif // __has_feature(address_sanitizer) #else // HIP version check #if __HIP_ENABLE_DEVICE_MALLOC__ __device__ void *__hip_malloc(__hip_size_t __size); __device__ void *__hip_free(void *__ptr); __attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) { return __hip_malloc(__size); } __attribute__((weak)) inline __device__ void free(void *__ptr) { __hip_free(__ptr); } #else __attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) { __builtin_trap(); return (void *)0; } __attribute__((weak)) inline __device__ void free(void *__ptr) { __builtin_trap(); } #endif #endif // HIP version check #ifdef __cplusplus } // extern "C" #endif //__cplusplus #include <__clang_hip_libdevice_declares.h> #include <__clang_hip_math.h> #include <__clang_hip_stdlib.h> #if defined(__HIPCC_RTC__) #include <__clang_hip_cmath.h> #else #include <__clang_cuda_math_forward_declares.h> #include <__clang_hip_cmath.h> #include <__clang_cuda_complex_builtins.h> #include #include #include #endif // __HIPCC_RTC__ #define __CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__ 1 #if defined(__HIPCC_RTC__) #pragma pop_macro("NULL") #pragma pop_macro("uint32_t") #pragma pop_macro("uint64_t") #pragma pop_macro("CHAR_BIT") #pragma pop_macro("INT_MAX") #endif // __HIPCC_RTC__ #endif // __HIP__ #endif // __CLANG_HIP_RUNTIME_WRAPPER_H__ /builtins/__clang_hip_stdlib.h/*===---- __clang_hip_stdlib.h - Device-side HIP math support --------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CLANG_HIP_STDLIB_H__ #if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__) #error "This file is for HIP and OpenMP AMDGCN device compilation only." #endif #if !defined(__cplusplus) #include #ifdef __OPENMP_AMDGCN__ #define __DEVICE__ static inline __attribute__((always_inline, nothrow)) #else #define __DEVICE__ static __device__ inline __attribute__((always_inline)) #endif __DEVICE__ int abs(int __x) { int __sgn = __x >> (sizeof(int) * CHAR_BIT - 1); return (__x ^ __sgn) - __sgn; } __DEVICE__ long labs(long __x) { long __sgn = __x >> (sizeof(long) * CHAR_BIT - 1); return (__x ^ __sgn) - __sgn; } __DEVICE__ long long llabs(long long __x) { long long __sgn = __x >> (sizeof(long long) * CHAR_BIT - 1); return (__x ^ __sgn) - __sgn; } #endif // !defined(__cplusplus) #endif // #define __CLANG_HIP_STDLIB_H__ /builtins/__stddef_max_align_t.h/*===---- __stddef_max_align_t.h - Definition of max_align_t for modules ---=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CLANG_MAX_ALIGN_T_DEFINED #define __CLANG_MAX_ALIGN_T_DEFINED #if defined(_MSC_VER) typedef double max_align_t; #elif defined(__APPLE__) typedef long double max_align_t; #else // Define 'max_align_t' to match the GCC definition. typedef struct { long long __clang_max_align_nonce1 __attribute__((__aligned__(__alignof__(long long)))); long double __clang_max_align_nonce2 __attribute__((__aligned__(__alignof__(long double)))); } max_align_t; #endif #endif /*===---- __wmmintrin_aes.h - AES intrinsics -------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __WMMINTRIN_H #error "Never use <__wmmintrin_aes.h> directly; include instead." #endif #ifndef __WMMINTRIN_AES_H #define __WMMINTRIN_AES_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("aes"), __min_vector_width__(128))) /// Performs a single round of AES encryption using the Equivalent /// Inverse Cipher, transforming the state value from the first source /// operand using a 128-bit round key value contained in the second source /// operand, and writes the result to the destination. /// /// \headerfile /// /// This intrinsic corresponds to the VAESENC instruction. /// /// \param __V /// A 128-bit integer vector containing the state value. /// \param __R /// A 128-bit integer vector containing the round key value. /// \returns A 128-bit integer vector containing the encrypted value. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_aesenc_si128(__m128i __V, __m128i __R) { return (__m128i)__builtin_ia32_aesenc128((__v2di)__V, (__v2di)__R); } /// Performs the final round of AES encryption using the Equivalent /// Inverse Cipher, transforming the state value from the first source /// operand using a 128-bit round key value contained in the second source /// operand, and writes the result to the destination. /// /// \headerfile /// /// This intrinsic corresponds to the VAESENCLAST instruction. /// /// \param __V /// A 128-bit integer vector containing the state value. /// \param __R /// A 128-bit integer vector containing the round key value. /// \returns A 128-bit integer vector containing the encrypted value. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_aesenclast_si128(__m128i __V, __m128i __R) { return (__m128i)__builtin_ia32_aesenclast128((__v2di)__V, (__v2di)__R); } /// Performs a single round of AES decryption using the Equivalent /// Inverse Cipher, transforming the state value from the first source /// operand using a 128-bit round key value contained in the second source /// operand, and writes the result to the destination. /// /// \headerfile /// /// This intrinsic corresponds to the VAESDEC instruction. /// /// \param __V /// A 128-bit integer vector containing the state value. /// \param __R /// A 128-bit integer vector containing the round key value. /// \returns A 128-bit integer vector containing the decrypted value. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_aesdec_si128(__m128i __V, __m128i __R) { return (__m128i)__builtin_ia32_aesdec128((__v2di)__V, (__v2di)__R); } /// Performs the final round of AES decryption using the Equivalent /// Inverse Cipher, transforming the state value from the first source /// operand using a 128-bit round key value contained in the second source /// operand, and writes the result to the destination. /// /// \headerfile /// /// This intrinsic corresponds to the VAESDECLAST instruction. /// /// \param __V /// A 128-bit integer vector containing the state value. /// \param __R /// A 128-bit integer vector containing the round key value. /// \returns A 128-bit integer vector containing the decrypted value. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_aesdeclast_si128(__m128i __V, __m128i __R) { return (__m128i)__builtin_ia32_aesdeclast128((__v2di)__V, (__v2di)__R); } /// Applies the AES InvMixColumns() transformation to an expanded key /// contained in the source operand, and writes the result to the /// destination. /// /// \headerfile /// /// This intrinsic corresponds to the VAESIMC instruction. /// /// \param __V /// A 128-bit integer vector containing the expanded key. /// \returns A 128-bit integer vector containing the transformed value. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_aesimc_si128(__m128i __V) { return (__m128i)__builtin_ia32_aesimc128((__v2di)__V); } /// Generates a round key for AES encryption, operating on 128-bit data /// specified in the first source operand and using an 8-bit round constant /// specified by the second source operand, and writes the result to the /// destination. /// /// \headerfile /// /// \code /// __m128i _mm_aeskeygenassist_si128(__m128i C, const int R); /// \endcode /// /// This intrinsic corresponds to the AESKEYGENASSIST instruction. /// /// \param C /// A 128-bit integer vector that is used to generate the AES encryption key. /// \param R /// An 8-bit round constant used to generate the AES encryption key. /// \returns A 128-bit round key for AES encryption. #define _mm_aeskeygenassist_si128(C, R) \ ((__m128i)__builtin_ia32_aeskeygenassist128((__v2di)(__m128i)(C), (int)(R))) #undef __DEFAULT_FN_ATTRS #endif /* __WMMINTRIN_AES_H */ /builtins/__wmmintrin_pclmul.h/*===---- __wmmintrin_pclmul.h - PCMUL intrinsics ---------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __WMMINTRIN_H #error "Never use <__wmmintrin_pclmul.h> directly; include instead." #endif #ifndef __WMMINTRIN_PCLMUL_H #define __WMMINTRIN_PCLMUL_H /// Multiplies two 64-bit integer values, which are selected from source /// operands using the immediate-value operand. The multiplication is a /// carry-less multiplication, and the 128-bit integer product is stored in /// the destination. /// /// \headerfile /// /// \code /// __m128i _mm_clmulepi64_si128(__m128i X, __m128i Y, const int I); /// \endcode /// /// This intrinsic corresponds to the VPCLMULQDQ instruction. /// /// \param X /// A 128-bit vector of [2 x i64] containing one of the source operands. /// \param Y /// A 128-bit vector of [2 x i64] containing one of the source operands. /// \param I /// An immediate value specifying which 64-bit values to select from the /// operands. Bit 0 is used to select a value from operand \a X, and bit /// 4 is used to select a value from operand \a Y: \n /// Bit[0]=0 indicates that bits[63:0] of operand \a X are used. \n /// Bit[0]=1 indicates that bits[127:64] of operand \a X are used. \n /// Bit[4]=0 indicates that bits[63:0] of operand \a Y are used. \n /// Bit[4]=1 indicates that bits[127:64] of operand \a Y are used. /// \returns The 128-bit integer vector containing the result of the carry-less /// multiplication of the selected 64-bit values. #define _mm_clmulepi64_si128(X, Y, I) \ ((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(X), \ (__v2di)(__m128i)(Y), (char)(I))) #endif /* __WMMINTRIN_PCLMUL_H */ /*===---- adxintrin.h - ADX intrinsics -------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __ADXINTRIN_H #define __ADXINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) /* Use C++ inline semantics in C++, GNU inline for C mode. */ #if defined(__cplusplus) #define __INLINE __inline #else #define __INLINE static __inline #endif #if defined(__cplusplus) extern "C" { #endif /* Intrinsics that are available only if __ADX__ is defined. */ /// Adds unsigned 32-bit integers \a __x and \a __y, plus 0 or 1 as indicated /// by the carry flag \a __cf. Stores the unsigned 32-bit sum in the memory /// at \a __p, and returns the 8-bit carry-out (carry flag). /// /// \code{.operation} /// temp := (__cf == 0) ? 0 : 1 /// Store32(__p, __x + __y + temp) /// result := CF /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c ADCX instruction. /// /// \param __cf /// The 8-bit unsigned carry flag; any non-zero value indicates carry. /// \param __x /// A 32-bit unsigned addend. /// \param __y /// A 32-bit unsigned addend. /// \param __p /// Pointer to memory for storing the sum. /// \returns The 8-bit unsigned carry-out value. __INLINE unsigned char __attribute__((__always_inline__, __nodebug__, __target__("adx"))) _addcarryx_u32(unsigned char __cf, unsigned int __x, unsigned int __y, unsigned int *__p) { return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p); } #ifdef __x86_64__ /// Adds unsigned 64-bit integers \a __x and \a __y, plus 0 or 1 as indicated /// by the carry flag \a __cf. Stores the unsigned 64-bit sum in the memory /// at \a __p, and returns the 8-bit carry-out (carry flag). /// /// \code{.operation} /// temp := (__cf == 0) ? 0 : 1 /// Store64(__p, __x + __y + temp) /// result := CF /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c ADCX instruction. /// /// \param __cf /// The 8-bit unsigned carry flag; any non-zero value indicates carry. /// \param __x /// A 64-bit unsigned addend. /// \param __y /// A 64-bit unsigned addend. /// \param __p /// Pointer to memory for storing the sum. /// \returns The 8-bit unsigned carry-out value. __INLINE unsigned char __attribute__((__always_inline__, __nodebug__, __target__("adx"))) _addcarryx_u64(unsigned char __cf, unsigned long long __x, unsigned long long __y, unsigned long long *__p) { return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p); } #endif /* Intrinsics that are also available if __ADX__ is undefined. */ /// Adds unsigned 32-bit integers \a __x and \a __y, plus 0 or 1 as indicated /// by the carry flag \a __cf. Stores the unsigned 32-bit sum in the memory /// at \a __p, and returns the 8-bit carry-out (carry flag). /// /// \code{.operation} /// temp := (__cf == 0) ? 0 : 1 /// Store32(__p, __x + __y + temp) /// result := CF /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c ADC instruction. /// /// \param __cf /// The 8-bit unsigned carry flag; any non-zero value indicates carry. /// \param __x /// A 32-bit unsigned addend. /// \param __y /// A 32-bit unsigned addend. /// \param __p /// Pointer to memory for storing the sum. /// \returns The 8-bit unsigned carry-out value. __INLINE unsigned char __DEFAULT_FN_ATTRS _addcarry_u32(unsigned char __cf, unsigned int __x, unsigned int __y, unsigned int *__p) { return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p); } #ifdef __x86_64__ /// Adds unsigned 64-bit integers \a __x and \a __y, plus 0 or 1 as indicated /// by the carry flag \a __cf. Stores the unsigned 64-bit sum in the memory /// at \a __p, and returns the 8-bit carry-out (carry flag). /// /// \code{.operation} /// temp := (__cf == 0) ? 0 : 1 /// Store64(__p, __x + __y + temp) /// result := CF /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c ADC instruction. /// /// \param __cf /// The 8-bit unsigned carry flag; any non-zero value indicates carry. /// \param __x /// A 64-bit unsigned addend. /// \param __y /// A 64-bit unsigned addend. /// \param __p /// Pointer to memory for storing the sum. /// \returns The 8-bit unsigned carry-out value. __INLINE unsigned char __DEFAULT_FN_ATTRS _addcarry_u64(unsigned char __cf, unsigned long long __x, unsigned long long __y, unsigned long long *__p) { return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p); } #endif /// Adds unsigned 32-bit integer \a __y to 0 or 1 as indicated by the carry /// flag \a __cf, and subtracts the result from unsigned 32-bit integer /// \a __x. Stores the unsigned 32-bit difference in the memory at \a __p, /// and returns the 8-bit carry-out (carry or overflow flag). /// /// \code{.operation} /// temp := (__cf == 0) ? 0 : 1 /// Store32(__p, __x - (__y + temp)) /// result := CF /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c SBB instruction. /// /// \param __cf /// The 8-bit unsigned carry flag; any non-zero value indicates carry. /// \param __x /// The 32-bit unsigned minuend. /// \param __y /// The 32-bit unsigned subtrahend. /// \param __p /// Pointer to memory for storing the difference. /// \returns The 8-bit unsigned carry-out value. __INLINE unsigned char __DEFAULT_FN_ATTRS _subborrow_u32(unsigned char __cf, unsigned int __x, unsigned int __y, unsigned int *__p) { return __builtin_ia32_subborrow_u32(__cf, __x, __y, __p); } #ifdef __x86_64__ /// Adds unsigned 64-bit integer \a __y to 0 or 1 as indicated by the carry /// flag \a __cf, and subtracts the result from unsigned 64-bit integer /// \a __x. Stores the unsigned 64-bit difference in the memory at \a __p, /// and returns the 8-bit carry-out (carry or overflow flag). /// /// \code{.operation} /// temp := (__cf == 0) ? 0 : 1 /// Store64(__p, __x - (__y + temp)) /// result := CF /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c ADC instruction. /// /// \param __cf /// The 8-bit unsigned carry flag; any non-zero value indicates carry. /// \param __x /// The 64-bit unsigned minuend. /// \param __y /// The 64-bit unsigned subtrahend. /// \param __p /// Pointer to memory for storing the difference. /// \returns The 8-bit unsigned carry-out value. __INLINE unsigned char __DEFAULT_FN_ATTRS _subborrow_u64(unsigned char __cf, unsigned long long __x, unsigned long long __y, unsigned long long *__p) { return __builtin_ia32_subborrow_u64(__cf, __x, __y, __p); } #endif #if defined(__cplusplus) } #endif #undef __DEFAULT_FN_ATTRS #endif /* __ADXINTRIN_H */ /*===---- ammintrin.h - SSE4a intrinsics -----------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __AMMINTRIN_H #define __AMMINTRIN_H #if !defined(__i386__) && !defined(__x86_64__) #error "This header is only meant to be used on x86 and x64 architecture" #endif #include /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a"), __min_vector_width__(128))) /// Extracts the specified bits from the lower 64 bits of the 128-bit /// integer vector operand at the index \a idx and of the length \a len. /// /// \headerfile /// /// \code /// __m128i _mm_extracti_si64(__m128i x, const int len, const int idx); /// \endcode /// /// This intrinsic corresponds to the EXTRQ instruction. /// /// \param x /// The value from which bits are extracted. /// \param len /// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0] /// are zero, the length is interpreted as 64. /// \param idx /// Bits [5:0] specify the index of the least significant bit; the other /// bits are ignored. If the sum of the index and length is greater than 64, /// the result is undefined. If the length and index are both zero, bits /// [63:0] of parameter \a x are extracted. If the length is zero but the /// index is non-zero, the result is undefined. /// \returns A 128-bit integer vector whose lower 64 bits contain the bits /// extracted from the source operand. #define _mm_extracti_si64(x, len, idx) \ ((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \ (char)(len), (char)(idx))) /// Extracts the specified bits from the lower 64 bits of the 128-bit /// integer vector operand at the index and of the length specified by /// \a __y. /// /// \headerfile /// /// This intrinsic corresponds to the EXTRQ instruction. /// /// \param __x /// The value from which bits are extracted. /// \param __y /// Specifies the index of the least significant bit at [13:8] and the /// length at [5:0]; all other bits are ignored. If bits [5:0] are zero, the /// length is interpreted as 64. If the sum of the index and length is /// greater than 64, the result is undefined. If the length and index are /// both zero, bits [63:0] of parameter \a __x are extracted. If the length /// is zero but the index is non-zero, the result is undefined. /// \returns A 128-bit vector whose lower 64 bits contain the bits extracted /// from the source operand. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_extract_si64(__m128i __x, __m128i __y) { return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y); } /// Inserts bits of a specified length from the source integer vector /// \a y into the lower 64 bits of the destination integer vector \a x at /// the index \a idx and of the length \a len. /// /// \headerfile /// /// \code /// __m128i _mm_inserti_si64(__m128i x, __m128i y, const int len, /// const int idx); /// \endcode /// /// This intrinsic corresponds to the INSERTQ instruction. /// /// \param x /// The destination operand where bits will be inserted. The inserted bits /// are defined by the length \a len and by the index \a idx specifying the /// least significant bit. /// \param y /// The source operand containing the bits to be extracted. The extracted /// bits are the least significant bits of operand \a y of length \a len. /// \param len /// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0] /// are zero, the length is interpreted as 64. /// \param idx /// Bits [5:0] specify the index of the least significant bit; the other /// bits are ignored. If the sum of the index and length is greater than 64, /// the result is undefined. If the length and index are both zero, bits /// [63:0] of parameter \a y are inserted into parameter \a x. If the length /// is zero but the index is non-zero, the result is undefined. /// \returns A 128-bit integer vector containing the original lower 64-bits of /// destination operand \a x with the specified bitfields replaced by the /// lower bits of source operand \a y. The upper 64 bits of the return value /// are undefined. #define _mm_inserti_si64(x, y, len, idx) \ ((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \ (__v2di)(__m128i)(y), \ (char)(len), (char)(idx))) /// Inserts bits of a specified length from the source integer vector /// \a __y into the lower 64 bits of the destination integer vector \a __x /// at the index and of the length specified by \a __y. /// /// \headerfile /// /// This intrinsic corresponds to the INSERTQ instruction. /// /// \param __x /// The destination operand where bits will be inserted. The inserted bits /// are defined by the length and by the index of the least significant bit /// specified by operand \a __y. /// \param __y /// The source operand containing the bits to be extracted. The extracted /// bits are the least significant bits of operand \a __y with length /// specified by bits [69:64]. These are inserted into the destination at the /// index specified by bits [77:72]; all other bits are ignored. If bits /// [69:64] are zero, the length is interpreted as 64. If the sum of the /// index and length is greater than 64, the result is undefined. If the /// length and index are both zero, bits [63:0] of parameter \a __y are /// inserted into parameter \a __x. If the length is zero but the index is /// non-zero, the result is undefined. /// \returns A 128-bit integer vector containing the original lower 64-bits of /// destination operand \a __x with the specified bitfields replaced by the /// lower bits of source operand \a __y. The upper 64 bits of the return /// value are undefined. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_insert_si64(__m128i __x, __m128i __y) { return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y); } /// Stores a 64-bit double-precision value in a 64-bit memory location. /// To minimize caching, the data is flagged as non-temporal (unlikely to be /// used again soon). /// /// \headerfile /// /// This intrinsic corresponds to the MOVNTSD instruction. /// /// \param __p /// The 64-bit memory location used to store the register value. /// \param __a /// The 64-bit double-precision floating-point register value to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_sd(double *__p, __m128d __a) { __builtin_ia32_movntsd(__p, (__v2df)__a); } /// Stores a 32-bit single-precision floating-point value in a 32-bit /// memory location. To minimize caching, the data is flagged as /// non-temporal (unlikely to be used again soon). /// /// \headerfile /// /// This intrinsic corresponds to the MOVNTSS instruction. /// /// \param __p /// The 32-bit memory location used to store the register value. /// \param __a /// The 32-bit single-precision floating-point register value to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_ss(float *__p, __m128 __a) { __builtin_ia32_movntss(__p, (__v4sf)__a); } #undef __DEFAULT_FN_ATTRS #endif /* __AMMINTRIN_H */ /*===--------- amxcomplexintrin.h - AMXCOMPLEX intrinsics -*- C++ -*---------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===------------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif // __IMMINTRIN_H #ifndef __AMX_COMPLEXINTRIN_H #define __AMX_COMPLEXINTRIN_H #ifdef __x86_64__ #define __DEFAULT_FN_ATTRS_COMPLEX \ __attribute__((__always_inline__, __nodebug__, __target__("amx-complex"))) /// Perform matrix multiplication of two tiles containing complex elements and /// accumulate the results into a packed single precision tile. Each dword /// element in input tiles \a a and \a b is interpreted as a complex number /// with FP16 real part and FP16 imaginary part. /// Calculates the imaginary part of the result. For each possible combination /// of (row of \a a, column of \a b), it performs a set of multiplication /// and accumulations on all corresponding complex numbers (one from \a a /// and one from \a b). The imaginary part of the \a a element is multiplied /// with the real part of the corresponding \a b element, and the real part /// of the \a a element is multiplied with the imaginary part of the /// corresponding \a b elements. The two accumulated results are added, and /// then accumulated into the corresponding row and column of \a dst. /// /// \headerfile /// /// \code /// void _tile_cmmimfp16ps(__tile dst, __tile a, __tile b); /// \endcode /// /// \code{.operation} /// FOR m := 0 TO dst.rows - 1 /// tmp := dst.row[m] /// FOR k := 0 TO (a.colsb / 4) - 1 /// FOR n := 0 TO (dst.colsb / 4) - 1 /// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+1]) /// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+0]) /// ENDFOR /// ENDFOR /// write_row_and_zero(dst, m, tmp, dst.colsb) /// ENDFOR /// zero_upper_rows(dst, dst.rows) /// zero_tileconfig_start() /// \endcode /// /// This intrinsic corresponds to the \c TCMMIMFP16PS instruction. /// /// \param dst /// The destination tile. Max size is 1024 Bytes. /// \param a /// The 1st source tile. Max size is 1024 Bytes. /// \param b /// The 2nd source tile. Max size is 1024 Bytes. #define _tile_cmmimfp16ps(dst, a, b) __builtin_ia32_tcmmimfp16ps(dst, a, b) /// Perform matrix multiplication of two tiles containing complex elements and /// accumulate the results into a packed single precision tile. Each dword /// element in input tiles \a a and \a b is interpreted as a complex number /// with FP16 real part and FP16 imaginary part. /// Calculates the real part of the result. For each possible combination /// of (row of \a a, column of \a b), it performs a set of multiplication /// and accumulations on all corresponding complex numbers (one from \a a /// and one from \a b). The real part of the \a a element is multiplied /// with the real part of the corresponding \a b element, and the negated /// imaginary part of the \a a element is multiplied with the imaginary /// part of the corresponding \a b elements. The two accumulated results /// are added, and then accumulated into the corresponding row and column /// of \a dst. /// /// \headerfile /// /// \code /// void _tile_cmmrlfp16ps(__tile dst, __tile a, __tile b); /// \endcode /// /// \code{.operation} /// FOR m := 0 TO dst.rows - 1 /// tmp := dst.row[m] /// FOR k := 0 TO (a.colsb / 4) - 1 /// FOR n := 0 TO (dst.colsb / 4) - 1 /// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+0]) /// tmp.fp32[n] += FP32(-a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+1]) /// ENDFOR /// ENDFOR /// write_row_and_zero(dst, m, tmp, dst.colsb) /// ENDFOR /// zero_upper_rows(dst, dst.rows) /// zero_tileconfig_start() /// \endcode /// /// This intrinsic corresponds to the \c TCMMIMFP16PS instruction. /// /// \param dst /// The destination tile. Max size is 1024 Bytes. /// \param a /// The 1st source tile. Max size is 1024 Bytes. /// \param b /// The 2nd source tile. Max size is 1024 Bytes. #define _tile_cmmrlfp16ps(dst, a, b) __builtin_ia32_tcmmrlfp16ps(dst, a, b) static __inline__ _tile1024i __DEFAULT_FN_ATTRS_COMPLEX _tile_cmmimfp16ps_internal(unsigned short m, unsigned short n, unsigned short k, _tile1024i dst, _tile1024i src1, _tile1024i src2) { return __builtin_ia32_tcmmimfp16ps_internal(m, n, k, dst, src1, src2); } static __inline__ _tile1024i __DEFAULT_FN_ATTRS_COMPLEX _tile_cmmrlfp16ps_internal(unsigned short m, unsigned short n, unsigned short k, _tile1024i dst, _tile1024i src1, _tile1024i src2) { return __builtin_ia32_tcmmrlfp16ps_internal(m, n, k, dst, src1, src2); } /// Perform matrix multiplication of two tiles containing complex elements and /// accumulate the results into a packed single precision tile. Each dword /// element in input tiles src0 and src1 is interpreted as a complex number with /// FP16 real part and FP16 imaginary part. /// This function calculates the imaginary part of the result. /// /// \headerfile /// /// This intrinsic corresponds to the TCMMIMFP16PS instruction. /// /// \param dst /// The destination tile. Max size is 1024 Bytes. /// \param src0 /// The 1st source tile. Max size is 1024 Bytes. /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. __DEFAULT_FN_ATTRS_COMPLEX static void __tile_cmmimfp16ps(__tile1024i *dst, __tile1024i src0, __tile1024i src1) { dst->tile = _tile_cmmimfp16ps_internal(src0.row, src1.col, src0.col, dst->tile, src0.tile, src1.tile); } /// Perform matrix multiplication of two tiles containing complex elements and /// accumulate the results into a packed single precision tile. Each dword /// element in input tiles src0 and src1 is interpreted as a complex number with /// FP16 real part and FP16 imaginary part. /// This function calculates the real part of the result. /// /// \headerfile /// /// This intrinsic corresponds to the TCMMRLFP16PS instruction. /// /// \param dst /// The destination tile. Max size is 1024 Bytes. /// \param src0 /// The 1st source tile. Max size is 1024 Bytes. /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. __DEFAULT_FN_ATTRS_COMPLEX static void __tile_cmmrlfp16ps(__tile1024i *dst, __tile1024i src0, __tile1024i src1) { dst->tile = _tile_cmmrlfp16ps_internal(src0.row, src1.col, src0.col, dst->tile, src0.tile, src1.tile); } #endif // __x86_64__ #endif // __AMX_COMPLEXINTRIN_H /*===------------- amxfp16intrin.h - AMX_FP16 intrinsics -*- C++ -*---------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===------------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; use instead." #endif /* __IMMINTRIN_H */ #ifndef __AMX_FP16INTRIN_H #define __AMX_FP16INTRIN_H #ifdef __x86_64__ /// Compute dot-product of FP16 (16-bit) floating-point pairs in tiles \a a /// and \a b, accumulating the intermediate single-precision (32-bit) /// floating-point elements with elements in \a dst, and store the 32-bit /// result back to tile \a dst. /// /// \headerfile /// /// \code /// void _tile_dpfp16ps (__tile dst, __tile a, __tile b) /// \endcode /// /// \code{.operation} /// FOR m := 0 TO dst.rows - 1 /// tmp := dst.row[m] /// FOR k := 0 TO (a.colsb / 4) - 1 /// FOR n := 0 TO (dst.colsb / 4) - 1 /// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * /// FP32(b.row[k].fp16[2*n+0]) /// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+1]) * /// FP32(b.row[k].fp16[2*n+1]) /// ENDFOR /// ENDFOR /// write_row_and_zero(dst, m, tmp, dst.colsb) /// ENDFOR /// zero_upper_rows(dst, dst.rows) /// zero_tileconfig_start() /// \endcode /// /// This intrinsic corresponds to the \c TDPFP16PS instruction. /// /// \param dst /// The destination tile. Max size is 1024 Bytes. /// \param a /// The 1st source tile. Max size is 1024 Bytes. /// \param b /// The 2nd source tile. Max size is 1024 Bytes. #define _tile_dpfp16ps(dst, a, b) \ __builtin_ia32_tdpfp16ps(dst, a, b) #endif /* __x86_64__ */ #endif /* __AMX_FP16INTRIN_H */ /*===--------------- amxintrin.h - AMX intrinsics -*- C/C++ -*---------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===------------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif /* __IMMINTRIN_H */ #ifndef __AMXINTRIN_H #define __AMXINTRIN_H #ifdef __x86_64__ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS_TILE \ __attribute__((__always_inline__, __nodebug__, __target__("amx-tile"))) #define __DEFAULT_FN_ATTRS_INT8 \ __attribute__((__always_inline__, __nodebug__, __target__("amx-int8"))) #define __DEFAULT_FN_ATTRS_BF16 \ __attribute__((__always_inline__, __nodebug__, __target__("amx-bf16"))) #define __DEFAULT_FN_ATTRS_FP16 \ __attribute__((__always_inline__, __nodebug__, __target__("amx-fp16"))) /// Load tile configuration from a 64-byte memory location specified by /// "mem_addr". The tile configuration includes the tile type palette, the /// number of bytes per row, and the number of rows. If the specified /// palette_id is zero, that signifies the init state for both the tile /// config and the tile data, and the tiles are zeroed. Any invalid /// configurations will result in #GP fault. /// /// \headerfile /// /// This intrinsic corresponds to the LDTILECFG instruction. /// /// \param __config /// A pointer to 512-bits configuration static __inline__ void __DEFAULT_FN_ATTRS_TILE _tile_loadconfig(const void *__config) { __builtin_ia32_tile_loadconfig(__config); } /// Stores the current tile configuration to a 64-byte memory location /// specified by "mem_addr". The tile configuration includes the tile type /// palette, the number of bytes per row, and the number of rows. If tiles /// are not configured, all zeroes will be stored to memory. /// /// \headerfile /// /// This intrinsic corresponds to the STTILECFG instruction. /// /// \param __config /// A pointer to 512-bits configuration static __inline__ void __DEFAULT_FN_ATTRS_TILE _tile_storeconfig(void *__config) { __builtin_ia32_tile_storeconfig(__config); } /// Release the tile configuration to return to the init state, which /// releases all storage it currently holds. /// /// \headerfile /// /// This intrinsic corresponds to the TILERELEASE instruction. static __inline__ void __DEFAULT_FN_ATTRS_TILE _tile_release(void) { __builtin_ia32_tilerelease(); } /// Load tile rows from memory specifieid by "base" address and "stride" into /// destination tile "dst" using the tile configuration previously configured /// via "_tile_loadconfig". /// /// \headerfile /// /// This intrinsic corresponds to the TILELOADD instruction. /// /// \param dst /// A destination tile. Max size is 1024 Bytes. /// \param base /// A pointer to base address. /// \param stride /// The stride between the rows' data to be loaded in memory. #define _tile_loadd(dst, base, stride) \ __builtin_ia32_tileloadd64((dst), ((const void *)(base)), \ (__SIZE_TYPE__)(stride)) /// Load tile rows from memory specifieid by "base" address and "stride" into /// destination tile "dst" using the tile configuration previously configured /// via "_tile_loadconfig". This intrinsic provides a hint to the implementation /// that the data will likely not be reused in the near future and the data /// caching can be optimized accordingly. /// /// \headerfile /// /// This intrinsic corresponds to the TILELOADDT1 instruction. /// /// \param dst /// A destination tile. Max size is 1024 Bytes. /// \param base /// A pointer to base address. /// \param stride /// The stride between the rows' data to be loaded in memory. #define _tile_stream_loadd(dst, base, stride) \ __builtin_ia32_tileloaddt164((dst), ((const void *)(base)), \ (__SIZE_TYPE__)(stride)) /// Store the tile specified by "src" to memory specifieid by "base" address and /// "stride" using the tile configuration previously configured via /// "_tile_loadconfig". /// /// \headerfile /// /// This intrinsic corresponds to the TILESTORED instruction. /// /// \param dst /// A destination tile. Max size is 1024 Bytes. /// \param base /// A pointer to base address. /// \param stride /// The stride between the rows' data to be stored in memory. #define _tile_stored(dst, base, stride) \ __builtin_ia32_tilestored64((dst), ((void *)(base)), (__SIZE_TYPE__)(stride)) /// Zero the tile specified by "tdest". /// /// \headerfile /// /// This intrinsic corresponds to the TILEZERO instruction. /// /// \param tile /// The destination tile to be zero. Max size is 1024 Bytes. #define _tile_zero(tile) __builtin_ia32_tilezero((tile)) /// Compute dot-product of bytes in tiles with a source/destination accumulator. /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in src0 with /// corresponding signed 8-bit integers in src1, producing 4 intermediate 32-bit /// results. Sum these 4 results with the corresponding 32-bit integer in "dst", /// and store the 32-bit result back to tile "dst". /// /// \headerfile /// /// This intrinsic corresponds to the TDPBSSD instruction. /// /// \param dst /// The destination tile. Max size is 1024 Bytes. /// \param src0 /// The 1st source tile. Max size is 1024 Bytes. /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. #define _tile_dpbssd(dst, src0, src1) \ __builtin_ia32_tdpbssd((dst), (src0), (src1)) /// Compute dot-product of bytes in tiles with a source/destination accumulator. /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in src0 with /// corresponding unsigned 8-bit integers in src1, producing 4 intermediate /// 32-bit results. Sum these 4 results with the corresponding 32-bit integer /// in "dst", and store the 32-bit result back to tile "dst". /// /// \headerfile /// /// This intrinsic corresponds to the TDPBSUD instruction. /// /// \param dst /// The destination tile. Max size is 1024 Bytes. /// \param src0 /// The 1st source tile. Max size is 1024 Bytes. /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. #define _tile_dpbsud(dst, src0, src1) \ __builtin_ia32_tdpbsud((dst), (src0), (src1)) /// Compute dot-product of bytes in tiles with a source/destination accumulator. /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in src0 with /// corresponding signed 8-bit integers in src1, producing 4 intermediate 32-bit /// results. Sum these 4 results with the corresponding 32-bit integer in "dst", /// and store the 32-bit result back to tile "dst". /// /// \headerfile /// /// This intrinsic corresponds to the TDPBUSD instruction. /// /// \param dst /// The destination tile. Max size is 1024 Bytes. /// \param src0 /// The 1st source tile. Max size is 1024 Bytes. /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. #define _tile_dpbusd(dst, src0, src1) \ __builtin_ia32_tdpbusd((dst), (src0), (src1)) /// Compute dot-product of bytes in tiles with a source/destination accumulator. /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in src0 with /// corresponding unsigned 8-bit integers in src1, producing 4 intermediate /// 32-bit results. Sum these 4 results with the corresponding 32-bit integer in /// "dst", and store the 32-bit result back to tile "dst". /// /// \headerfile /// /// This intrinsic corresponds to the TDPBUUD instruction. /// /// \param dst /// The destination tile. Max size is 1024 Bytes. /// \param src0 /// The 1st source tile. Max size is 1024 Bytes. /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. #define _tile_dpbuud(dst, src0, src1) \ __builtin_ia32_tdpbuud((dst), (src0), (src1)) /// Compute dot-product of BF16 (16-bit) floating-point pairs in tiles src0 and /// src1, accumulating the intermediate single-precision (32-bit) floating-point /// elements with elements in "dst", and store the 32-bit result back to tile /// "dst". /// /// \headerfile /// /// This intrinsic corresponds to the TDPBF16PS instruction. /// /// \param dst /// The destination tile. Max size is 1024 Bytes. /// \param src0 /// The 1st source tile. Max size is 1024 Bytes. /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. #define _tile_dpbf16ps(dst, src0, src1) \ __builtin_ia32_tdpbf16ps((dst), (src0), (src1)) /// AMX tile register size can be configured, the maximum size is 16x64=1024 /// bytes. Since there is no 2D type in llvm IR, we use vector type to /// represent 2D tile and the fixed size is maximum amx tile register size. typedef int _tile1024i __attribute__((__vector_size__(1024), __aligned__(64))); /// This is internal intrinsic. C/C++ user should avoid calling it directly. static __inline__ _tile1024i __DEFAULT_FN_ATTRS_INT8 _tile_loadd_internal(unsigned short m, unsigned short n, const void *base, __SIZE_TYPE__ stride) { return __builtin_ia32_tileloadd64_internal(m, n, base, (__SIZE_TYPE__)(stride)); } /// This is internal intrinsic. C/C++ user should avoid calling it directly. static __inline__ _tile1024i __DEFAULT_FN_ATTRS_INT8 _tile_loaddt1_internal(unsigned short m, unsigned short n, const void *base, __SIZE_TYPE__ stride) { return __builtin_ia32_tileloaddt164_internal(m, n, base, (__SIZE_TYPE__)(stride)); } /// This is internal intrinsic. C/C++ user should avoid calling it directly. static __inline__ _tile1024i __DEFAULT_FN_ATTRS_INT8 _tile_dpbssd_internal(unsigned short m, unsigned short n, unsigned short k, _tile1024i dst, _tile1024i src1, _tile1024i src2) { return __builtin_ia32_tdpbssd_internal(m, n, k, dst, src1, src2); } /// This is internal intrinsic. C/C++ user should avoid calling it directly. static __inline__ _tile1024i __DEFAULT_FN_ATTRS_INT8 _tile_dpbsud_internal(unsigned short m, unsigned short n, unsigned short k, _tile1024i dst, _tile1024i src1, _tile1024i src2) { return __builtin_ia32_tdpbsud_internal(m, n, k, dst, src1, src2); } /// This is internal intrinsic. C/C++ user should avoid calling it directly. static __inline__ _tile1024i __DEFAULT_FN_ATTRS_INT8 _tile_dpbusd_internal(unsigned short m, unsigned short n, unsigned short k, _tile1024i dst, _tile1024i src1, _tile1024i src2) { return __builtin_ia32_tdpbusd_internal(m, n, k, dst, src1, src2); } /// This is internal intrinsic. C/C++ user should avoid calling it directly. static __inline__ _tile1024i __DEFAULT_FN_ATTRS_INT8 _tile_dpbuud_internal(unsigned short m, unsigned short n, unsigned short k, _tile1024i dst, _tile1024i src1, _tile1024i src2) { return __builtin_ia32_tdpbuud_internal(m, n, k, dst, src1, src2); } /// This is internal intrinsic. C/C++ user should avoid calling it directly. static __inline__ void __DEFAULT_FN_ATTRS_INT8 _tile_stored_internal(unsigned short m, unsigned short n, void *base, __SIZE_TYPE__ stride, _tile1024i tile) { return __builtin_ia32_tilestored64_internal(m, n, base, (__SIZE_TYPE__)(stride), tile); } /// This is internal intrinsic. C/C++ user should avoid calling it directly. static __inline__ _tile1024i __DEFAULT_FN_ATTRS_BF16 _tile_dpbf16ps_internal(unsigned short m, unsigned short n, unsigned short k, _tile1024i dst, _tile1024i src1, _tile1024i src2) { return __builtin_ia32_tdpbf16ps_internal(m, n, k, dst, src1, src2); } /// This is internal intrinsic. C/C++ user should avoid calling it directly. static __inline__ _tile1024i __DEFAULT_FN_ATTRS_FP16 _tile_dpfp16ps_internal(unsigned short m, unsigned short n, unsigned short k, _tile1024i dst, _tile1024i src1, _tile1024i src2) { return __builtin_ia32_tdpfp16ps_internal(m, n, k, dst, src1, src2); } /// This struct pack the shape and tile data together for user. We suggest /// initializing the struct as early as possible, because compiler depends /// on the shape information to do configure. The constant value is preferred /// for optimization by compiler. typedef struct __tile1024i_str { const unsigned short row; const unsigned short col; _tile1024i tile; } __tile1024i; /// Load tile rows from memory specifieid by "base" address and "stride" into /// destination tile "dst". /// /// \headerfile /// /// This intrinsic corresponds to the TILELOADD instruction. /// /// \param dst /// A destination tile. Max size is 1024 Bytes. /// \param base /// A pointer to base address. /// \param stride /// The stride between the rows' data to be loaded in memory. __DEFAULT_FN_ATTRS_TILE static __inline__ void __tile_loadd(__tile1024i *dst, const void *base, __SIZE_TYPE__ stride) { dst->tile = _tile_loadd_internal(dst->row, dst->col, base, stride); } /// Load tile rows from memory specifieid by "base" address and "stride" into /// destination tile "dst". This intrinsic provides a hint to the implementation /// that the data will likely not be reused in the near future and the data /// caching can be optimized accordingly. /// /// \headerfile /// /// This intrinsic corresponds to the TILELOADDT1 instruction. /// /// \param dst /// A destination tile. Max size is 1024 Bytes. /// \param base /// A pointer to base address. /// \param stride /// The stride between the rows' data to be loaded in memory. __DEFAULT_FN_ATTRS_TILE static __inline__ void __tile_stream_loadd(__tile1024i *dst, const void *base, __SIZE_TYPE__ stride) { dst->tile = _tile_loaddt1_internal(dst->row, dst->col, base, stride); } /// Compute dot-product of bytes in tiles with a source/destination accumulator. /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in src0 with /// corresponding signed 8-bit integers in src1, producing 4 intermediate 32-bit /// results. Sum these 4 results with the corresponding 32-bit integer in "dst", /// and store the 32-bit result back to tile "dst". /// /// \headerfile /// /// This intrinsic corresponds to the TDPBSSD instruction. /// /// \param dst /// The destination tile. Max size is 1024 Bytes. /// \param src0 /// The 1st source tile. Max size is 1024 Bytes. /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. __DEFAULT_FN_ATTRS_INT8 static __inline__ void __tile_dpbssd(__tile1024i *dst, __tile1024i src0, __tile1024i src1) { dst->tile = _tile_dpbssd_internal(src0.row, src1.col, src0.col, dst->tile, src0.tile, src1.tile); } /// Compute dot-product of bytes in tiles with a source/destination accumulator. /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in src0 with /// corresponding unsigned 8-bit integers in src1, producing 4 intermediate /// 32-bit results. Sum these 4 results with the corresponding 32-bit integer /// in "dst", and store the 32-bit result back to tile "dst". /// /// \headerfile /// /// This intrinsic corresponds to the TDPBSUD instruction. /// /// \param dst /// The destination tile. Max size is 1024 Bytes. /// \param src0 /// The 1st source tile. Max size is 1024 Bytes. /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. __DEFAULT_FN_ATTRS_INT8 static __inline__ void __tile_dpbsud(__tile1024i *dst, __tile1024i src0, __tile1024i src1) { dst->tile = _tile_dpbsud_internal(src0.row, src1.col, src0.col, dst->tile, src0.tile, src1.tile); } /// Compute dot-product of bytes in tiles with a source/destination accumulator. /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in src0 with /// corresponding signed 8-bit integers in src1, producing 4 intermediate 32-bit /// results. Sum these 4 results with the corresponding 32-bit integer in "dst", /// and store the 32-bit result back to tile "dst". /// /// \headerfile /// /// This intrinsic corresponds to the TDPBUSD instruction. /// /// \param dst /// The destination tile. Max size is 1024 Bytes. /// \param src0 /// The 1st source tile. Max size is 1024 Bytes. /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. __DEFAULT_FN_ATTRS_INT8 static __inline__ void __tile_dpbusd(__tile1024i *dst, __tile1024i src0, __tile1024i src1) { dst->tile = _tile_dpbusd_internal(src0.row, src1.col, src0.col, dst->tile, src0.tile, src1.tile); } /// Compute dot-product of bytes in tiles with a source/destination accumulator. /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in src0 with /// corresponding unsigned 8-bit integers in src1, producing 4 intermediate /// 32-bit results. Sum these 4 results with the corresponding 32-bit integer in /// "dst", and store the 32-bit result back to tile "dst". /// /// \headerfile /// /// This intrinsic corresponds to the TDPBUUD instruction. /// /// \param dst /// The destination tile. Max size is 1024 Bytes. /// \param src0 /// The 1st source tile. Max size is 1024 Bytes. /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. __DEFAULT_FN_ATTRS_INT8 static __inline__ void __tile_dpbuud(__tile1024i *dst, __tile1024i src0, __tile1024i src1) { dst->tile = _tile_dpbuud_internal(src0.row, src1.col, src0.col, dst->tile, src0.tile, src1.tile); } /// Store the tile specified by "src" to memory specifieid by "base" address and /// "stride". /// /// \headerfile /// /// This intrinsic corresponds to the TILESTORED instruction. /// /// \param base /// A pointer to base address. /// \param stride /// The stride between the rows' data to be stored in memory. __DEFAULT_FN_ATTRS_TILE static __inline__ void __tile_stored(void *base, __SIZE_TYPE__ stride, __tile1024i src) { _tile_stored_internal(src.row, src.col, base, stride, src.tile); } /// Zero the tile specified by "dst". /// /// \headerfile /// /// This intrinsic corresponds to the TILEZERO instruction. /// /// \param dst /// The destination tile to be zero. Max size is 1024 Bytes. __DEFAULT_FN_ATTRS_TILE static __inline__ void __tile_zero(__tile1024i *dst) { dst->tile = __builtin_ia32_tilezero_internal(dst->row, dst->col); } /// Compute dot-product of BF16 (16-bit) floating-point pairs in tiles src0 and /// src1, accumulating the intermediate single-precision (32-bit) floating-point /// elements with elements in "dst", and store the 32-bit result back to tile /// "dst". /// /// \headerfile /// /// This intrinsic corresponds to the TDPBF16PS instruction. /// /// \param dst /// The destination tile. Max size is 1024 Bytes. /// \param src0 /// The 1st source tile. Max size is 1024 Bytes. /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. __DEFAULT_FN_ATTRS_BF16 static __inline__ void __tile_dpbf16ps(__tile1024i *dst, __tile1024i src0, __tile1024i src1) { dst->tile = _tile_dpbf16ps_internal(src0.row, src1.col, src0.col, dst->tile, src0.tile, src1.tile); } /// Compute dot-product of FP16 (16-bit) floating-point pairs in tiles src0 and /// src1, accumulating the intermediate single-precision (32-bit) floating-point /// elements with elements in "dst", and store the 32-bit result back to tile /// "dst". /// /// \headerfile /// /// This intrinsic corresponds to the TDPFP16PS instruction. /// /// \param dst /// The destination tile. Max size is 1024 Bytes. /// \param src0 /// The 1st source tile. Max size is 1024 Bytes. /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. __DEFAULT_FN_ATTRS_FP16 static __inline__ void __tile_dpfp16ps(__tile1024i *dst, __tile1024i src0, __tile1024i src1) { dst->tile = _tile_dpfp16ps_internal(src0.row, src1.col, src0.col, dst->tile, src0.tile, src1.tile); } #undef __DEFAULT_FN_ATTRS_TILE #undef __DEFAULT_FN_ATTRS_INT8 #undef __DEFAULT_FN_ATTRS_BF16 #undef __DEFAULT_FN_ATTRS_FP16 #endif /* __x86_64__ */ #endif /* __AMXINTRIN_H */ /*===---- arm64intr.h - ARM64 Windows intrinsics -------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ /* Only include this if we're compiling for the windows platform. */ #ifndef _MSC_VER #include_next #else #ifndef __ARM64INTR_H #define __ARM64INTR_H typedef enum { _ARM64_BARRIER_SY = 0xF, _ARM64_BARRIER_ST = 0xE, _ARM64_BARRIER_LD = 0xD, _ARM64_BARRIER_ISH = 0xB, _ARM64_BARRIER_ISHST = 0xA, _ARM64_BARRIER_ISHLD = 0x9, _ARM64_BARRIER_NSH = 0x7, _ARM64_BARRIER_NSHST = 0x6, _ARM64_BARRIER_NSHLD = 0x5, _ARM64_BARRIER_OSH = 0x3, _ARM64_BARRIER_OSHST = 0x2, _ARM64_BARRIER_OSHLD = 0x1 } _ARM64INTR_BARRIER_TYPE; #endif /* __ARM64INTR_H */ #endif /* _MSC_VER */ /*===---- arm_acle.h - ARM Non-Neon intrinsics -----------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __ARM_ACLE_H #define __ARM_ACLE_H #ifndef __ARM_ACLE #error "ACLE intrinsics support not enabled." #endif #include #if defined(__cplusplus) extern "C" { #endif /* 8 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */ /* 8.3 Memory barriers */ #if !__has_builtin(__dmb) #define __dmb(i) __builtin_arm_dmb(i) #endif #if !__has_builtin(__dsb) #define __dsb(i) __builtin_arm_dsb(i) #endif #if !__has_builtin(__isb) #define __isb(i) __builtin_arm_isb(i) #endif /* 8.4 Hints */ #if !__has_builtin(__wfi) static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfi(void) { __builtin_arm_wfi(); } #endif #if !__has_builtin(__wfe) static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfe(void) { __builtin_arm_wfe(); } #endif #if !__has_builtin(__sev) static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sev(void) { __builtin_arm_sev(); } #endif #if !__has_builtin(__sevl) static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sevl(void) { __builtin_arm_sevl(); } #endif #if !__has_builtin(__yield) static __inline__ void __attribute__((__always_inline__, __nodebug__)) __yield(void) { __builtin_arm_yield(); } #endif #if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE #define __dbg(t) __builtin_arm_dbg(t) #endif /* 8.5 Swap */ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __swp(uint32_t __x, volatile uint32_t *__p) { uint32_t v; do v = __builtin_arm_ldrex(__p); while (__builtin_arm_strex(__x, __p)); return v; } /* 8.6 Memory prefetch intrinsics */ /* 8.6.1 Data prefetch */ #define __pld(addr) __pldx(0, 0, 0, addr) #if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE #define __pldx(access_kind, cache_level, retention_policy, addr) \ __builtin_arm_prefetch(addr, access_kind, 1) #else #define __pldx(access_kind, cache_level, retention_policy, addr) \ __builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1) #endif /* 8.6.2 Instruction prefetch */ #define __pli(addr) __plix(0, 0, addr) #if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE #define __plix(cache_level, retention_policy, addr) \ __builtin_arm_prefetch(addr, 0, 0) #else #define __plix(cache_level, retention_policy, addr) \ __builtin_arm_prefetch(addr, 0, cache_level, retention_policy, 0) #endif /* 8.7 NOP */ #if !defined(_MSC_VER) || !defined(__aarch64__) static __inline__ void __attribute__((__always_inline__, __nodebug__)) __nop(void) { __builtin_arm_nop(); } #endif /* 9 DATA-PROCESSING INTRINSICS */ /* 9.2 Miscellaneous data-processing intrinsics */ /* ROR */ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __ror(uint32_t __x, uint32_t __y) { __y %= 32; if (__y == 0) return __x; return (__x >> __y) | (__x << (32 - __y)); } static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __rorll(uint64_t __x, uint32_t __y) { __y %= 64; if (__y == 0) return __x; return (__x >> __y) | (__x << (64 - __y)); } static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) __rorl(unsigned long __x, uint32_t __y) { #if __SIZEOF_LONG__ == 4 return __ror(__x, __y); #else return __rorll(__x, __y); #endif } /* CLZ */ static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) __clz(uint32_t __t) { return __builtin_arm_clz(__t); } static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) __clzl(unsigned long __t) { #if __SIZEOF_LONG__ == 4 return __builtin_arm_clz(__t); #else return __builtin_arm_clz64(__t); #endif } static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) __clzll(uint64_t __t) { return __builtin_arm_clz64(__t); } /* CLS */ static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) __cls(uint32_t __t) { return __builtin_arm_cls(__t); } static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) __clsl(unsigned long __t) { #if __SIZEOF_LONG__ == 4 return __builtin_arm_cls(__t); #else return __builtin_arm_cls64(__t); #endif } static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) __clsll(uint64_t __t) { return __builtin_arm_cls64(__t); } /* REV */ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __rev(uint32_t __t) { return __builtin_bswap32(__t); } static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) __revl(unsigned long __t) { #if __SIZEOF_LONG__ == 4 return __builtin_bswap32(__t); #else return __builtin_bswap64(__t); #endif } static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __revll(uint64_t __t) { return __builtin_bswap64(__t); } /* REV16 */ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __rev16(uint32_t __t) { return __ror(__rev(__t), 16); } static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __rev16ll(uint64_t __t) { return (((uint64_t)__rev16(__t >> 32)) << 32) | (uint64_t)__rev16((uint32_t)__t); } static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) __rev16l(unsigned long __t) { #if __SIZEOF_LONG__ == 4 return __rev16(__t); #else return __rev16ll(__t); #endif } /* REVSH */ static __inline__ int16_t __attribute__((__always_inline__, __nodebug__)) __revsh(int16_t __t) { return (int16_t)__builtin_bswap16((uint16_t)__t); } /* RBIT */ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __rbit(uint32_t __t) { return __builtin_arm_rbit(__t); } static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __rbitll(uint64_t __t) { #if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE return (((uint64_t)__builtin_arm_rbit(__t)) << 32) | __builtin_arm_rbit(__t >> 32); #else return __builtin_arm_rbit64(__t); #endif } static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) __rbitl(unsigned long __t) { #if __SIZEOF_LONG__ == 4 return __rbit(__t); #else return __rbitll(__t); #endif } /* * 9.3 16-bit multiplications */ #if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP static __inline__ int32_t __attribute__((__always_inline__,__nodebug__)) __smulbb(int32_t __a, int32_t __b) { return __builtin_arm_smulbb(__a, __b); } static __inline__ int32_t __attribute__((__always_inline__,__nodebug__)) __smulbt(int32_t __a, int32_t __b) { return __builtin_arm_smulbt(__a, __b); } static __inline__ int32_t __attribute__((__always_inline__,__nodebug__)) __smultb(int32_t __a, int32_t __b) { return __builtin_arm_smultb(__a, __b); } static __inline__ int32_t __attribute__((__always_inline__,__nodebug__)) __smultt(int32_t __a, int32_t __b) { return __builtin_arm_smultt(__a, __b); } static __inline__ int32_t __attribute__((__always_inline__,__nodebug__)) __smulwb(int32_t __a, int32_t __b) { return __builtin_arm_smulwb(__a, __b); } static __inline__ int32_t __attribute__((__always_inline__,__nodebug__)) __smulwt(int32_t __a, int32_t __b) { return __builtin_arm_smulwt(__a, __b); } #endif /* * 9.4 Saturating intrinsics * * FIXME: Change guard to their corresponding __ARM_FEATURE flag when Q flag * intrinsics are implemented and the flag is enabled. */ /* 9.4.1 Width-specified saturation intrinsics */ #if defined(__ARM_FEATURE_SAT) && __ARM_FEATURE_SAT #define __ssat(x, y) __builtin_arm_ssat(x, y) #define __usat(x, y) __builtin_arm_usat(x, y) #endif /* 9.4.2 Saturating addition and subtraction intrinsics */ #if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __qadd(int32_t __t, int32_t __v) { return __builtin_arm_qadd(__t, __v); } static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __qsub(int32_t __t, int32_t __v) { return __builtin_arm_qsub(__t, __v); } static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __qdbl(int32_t __t) { return __builtin_arm_qadd(__t, __t); } #endif /* 9.4.3 Accumultating multiplications */ #if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __smlabb(int32_t __a, int32_t __b, int32_t __c) { return __builtin_arm_smlabb(__a, __b, __c); } static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __smlabt(int32_t __a, int32_t __b, int32_t __c) { return __builtin_arm_smlabt(__a, __b, __c); } static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __smlatb(int32_t __a, int32_t __b, int32_t __c) { return __builtin_arm_smlatb(__a, __b, __c); } static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __smlatt(int32_t __a, int32_t __b, int32_t __c) { return __builtin_arm_smlatt(__a, __b, __c); } static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __smlawb(int32_t __a, int32_t __b, int32_t __c) { return __builtin_arm_smlawb(__a, __b, __c); } static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __smlawt(int32_t __a, int32_t __b, int32_t __c) { return __builtin_arm_smlawt(__a, __b, __c); } #endif /* 9.5.4 Parallel 16-bit saturation */ #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32 #define __ssat16(x, y) __builtin_arm_ssat16(x, y) #define __usat16(x, y) __builtin_arm_usat16(x, y) #endif /* 9.5.5 Packing and unpacking */ #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32 typedef int32_t int8x4_t; typedef int32_t int16x2_t; typedef uint32_t uint8x4_t; typedef uint32_t uint16x2_t; static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) __sxtab16(int16x2_t __a, int8x4_t __b) { return __builtin_arm_sxtab16(__a, __b); } static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) __sxtb16(int8x4_t __a) { return __builtin_arm_sxtb16(__a); } static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) __uxtab16(int16x2_t __a, int8x4_t __b) { return __builtin_arm_uxtab16(__a, __b); } static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) __uxtb16(int8x4_t __a) { return __builtin_arm_uxtb16(__a); } #endif /* 9.5.6 Parallel selection */ #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32 static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__)) __sel(uint8x4_t __a, uint8x4_t __b) { return __builtin_arm_sel(__a, __b); } #endif /* 9.5.7 Parallel 8-bit addition and subtraction */ #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32 static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__)) __qadd8(int8x4_t __a, int8x4_t __b) { return __builtin_arm_qadd8(__a, __b); } static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__)) __qsub8(int8x4_t __a, int8x4_t __b) { return __builtin_arm_qsub8(__a, __b); } static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__)) __sadd8(int8x4_t __a, int8x4_t __b) { return __builtin_arm_sadd8(__a, __b); } static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__)) __shadd8(int8x4_t __a, int8x4_t __b) { return __builtin_arm_shadd8(__a, __b); } static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__)) __shsub8(int8x4_t __a, int8x4_t __b) { return __builtin_arm_shsub8(__a, __b); } static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__)) __ssub8(int8x4_t __a, int8x4_t __b) { return __builtin_arm_ssub8(__a, __b); } static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__)) __uadd8(uint8x4_t __a, uint8x4_t __b) { return __builtin_arm_uadd8(__a, __b); } static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__)) __uhadd8(uint8x4_t __a, uint8x4_t __b) { return __builtin_arm_uhadd8(__a, __b); } static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__)) __uhsub8(uint8x4_t __a, uint8x4_t __b) { return __builtin_arm_uhsub8(__a, __b); } static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__)) __uqadd8(uint8x4_t __a, uint8x4_t __b) { return __builtin_arm_uqadd8(__a, __b); } static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__)) __uqsub8(uint8x4_t __a, uint8x4_t __b) { return __builtin_arm_uqsub8(__a, __b); } static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__)) __usub8(uint8x4_t __a, uint8x4_t __b) { return __builtin_arm_usub8(__a, __b); } #endif /* 9.5.8 Sum of 8-bit absolute differences */ #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __usad8(uint8x4_t __a, uint8x4_t __b) { return __builtin_arm_usad8(__a, __b); } static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __usada8(uint8x4_t __a, uint8x4_t __b, uint32_t __c) { return __builtin_arm_usada8(__a, __b, __c); } #endif /* 9.5.9 Parallel 16-bit addition and subtraction */ #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32 static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) __qadd16(int16x2_t __a, int16x2_t __b) { return __builtin_arm_qadd16(__a, __b); } static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) __qasx(int16x2_t __a, int16x2_t __b) { return __builtin_arm_qasx(__a, __b); } static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) __qsax(int16x2_t __a, int16x2_t __b) { return __builtin_arm_qsax(__a, __b); } static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) __qsub16(int16x2_t __a, int16x2_t __b) { return __builtin_arm_qsub16(__a, __b); } static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) __sadd16(int16x2_t __a, int16x2_t __b) { return __builtin_arm_sadd16(__a, __b); } static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) __sasx(int16x2_t __a, int16x2_t __b) { return __builtin_arm_sasx(__a, __b); } static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) __shadd16(int16x2_t __a, int16x2_t __b) { return __builtin_arm_shadd16(__a, __b); } static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) __shasx(int16x2_t __a, int16x2_t __b) { return __builtin_arm_shasx(__a, __b); } static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) __shsax(int16x2_t __a, int16x2_t __b) { return __builtin_arm_shsax(__a, __b); } static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) __shsub16(int16x2_t __a, int16x2_t __b) { return __builtin_arm_shsub16(__a, __b); } static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) __ssax(int16x2_t __a, int16x2_t __b) { return __builtin_arm_ssax(__a, __b); } static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) __ssub16(int16x2_t __a, int16x2_t __b) { return __builtin_arm_ssub16(__a, __b); } static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) __uadd16(uint16x2_t __a, uint16x2_t __b) { return __builtin_arm_uadd16(__a, __b); } static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) __uasx(uint16x2_t __a, uint16x2_t __b) { return __builtin_arm_uasx(__a, __b); } static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) __uhadd16(uint16x2_t __a, uint16x2_t __b) { return __builtin_arm_uhadd16(__a, __b); } static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) __uhasx(uint16x2_t __a, uint16x2_t __b) { return __builtin_arm_uhasx(__a, __b); } static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) __uhsax(uint16x2_t __a, uint16x2_t __b) { return __builtin_arm_uhsax(__a, __b); } static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) __uhsub16(uint16x2_t __a, uint16x2_t __b) { return __builtin_arm_uhsub16(__a, __b); } static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) __uqadd16(uint16x2_t __a, uint16x2_t __b) { return __builtin_arm_uqadd16(__a, __b); } static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) __uqasx(uint16x2_t __a, uint16x2_t __b) { return __builtin_arm_uqasx(__a, __b); } static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) __uqsax(uint16x2_t __a, uint16x2_t __b) { return __builtin_arm_uqsax(__a, __b); } static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) __uqsub16(uint16x2_t __a, uint16x2_t __b) { return __builtin_arm_uqsub16(__a, __b); } static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) __usax(uint16x2_t __a, uint16x2_t __b) { return __builtin_arm_usax(__a, __b); } static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) __usub16(uint16x2_t __a, uint16x2_t __b) { return __builtin_arm_usub16(__a, __b); } #endif /* 9.5.10 Parallel 16-bit multiplications */ #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __smlad(int16x2_t __a, int16x2_t __b, int32_t __c) { return __builtin_arm_smlad(__a, __b, __c); } static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __smladx(int16x2_t __a, int16x2_t __b, int32_t __c) { return __builtin_arm_smladx(__a, __b, __c); } static __inline__ int64_t __attribute__((__always_inline__, __nodebug__)) __smlald(int16x2_t __a, int16x2_t __b, int64_t __c) { return __builtin_arm_smlald(__a, __b, __c); } static __inline__ int64_t __attribute__((__always_inline__, __nodebug__)) __smlaldx(int16x2_t __a, int16x2_t __b, int64_t __c) { return __builtin_arm_smlaldx(__a, __b, __c); } static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __smlsd(int16x2_t __a, int16x2_t __b, int32_t __c) { return __builtin_arm_smlsd(__a, __b, __c); } static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __smlsdx(int16x2_t __a, int16x2_t __b, int32_t __c) { return __builtin_arm_smlsdx(__a, __b, __c); } static __inline__ int64_t __attribute__((__always_inline__, __nodebug__)) __smlsld(int16x2_t __a, int16x2_t __b, int64_t __c) { return __builtin_arm_smlsld(__a, __b, __c); } static __inline__ int64_t __attribute__((__always_inline__, __nodebug__)) __smlsldx(int16x2_t __a, int16x2_t __b, int64_t __c) { return __builtin_arm_smlsldx(__a, __b, __c); } static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __smuad(int16x2_t __a, int16x2_t __b) { return __builtin_arm_smuad(__a, __b); } static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __smuadx(int16x2_t __a, int16x2_t __b) { return __builtin_arm_smuadx(__a, __b); } static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __smusd(int16x2_t __a, int16x2_t __b) { return __builtin_arm_smusd(__a, __b); } static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __smusdx(int16x2_t __a, int16x2_t __b) { return __builtin_arm_smusdx(__a, __b); } #endif /* 9.7 CRC32 intrinsics */ #if (defined(__ARM_FEATURE_CRC32) && __ARM_FEATURE_CRC32) || \ (defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE) static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc"))) __crc32b(uint32_t __a, uint8_t __b) { return __builtin_arm_crc32b(__a, __b); } static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc"))) __crc32h(uint32_t __a, uint16_t __b) { return __builtin_arm_crc32h(__a, __b); } static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc"))) __crc32w(uint32_t __a, uint32_t __b) { return __builtin_arm_crc32w(__a, __b); } static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc"))) __crc32d(uint32_t __a, uint64_t __b) { return __builtin_arm_crc32d(__a, __b); } static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc"))) __crc32cb(uint32_t __a, uint8_t __b) { return __builtin_arm_crc32cb(__a, __b); } static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc"))) __crc32ch(uint32_t __a, uint16_t __b) { return __builtin_arm_crc32ch(__a, __b); } static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc"))) __crc32cw(uint32_t __a, uint32_t __b) { return __builtin_arm_crc32cw(__a, __b); } static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc"))) __crc32cd(uint32_t __a, uint64_t __b) { return __builtin_arm_crc32cd(__a, __b); } #endif /* Armv8.3-A Javascript conversion intrinsic */ #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("v8.3a"))) __jcvt(double __a) { return __builtin_arm_jcvt(__a); } #endif /* Armv8.5-A FP rounding intrinsics */ #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a"))) __rint32zf(float __a) { return __builtin_arm_rint32zf(__a); } static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a"))) __rint32z(double __a) { return __builtin_arm_rint32z(__a); } static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a"))) __rint64zf(float __a) { return __builtin_arm_rint64zf(__a); } static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a"))) __rint64z(double __a) { return __builtin_arm_rint64z(__a); } static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a"))) __rint32xf(float __a) { return __builtin_arm_rint32xf(__a); } static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a"))) __rint32x(double __a) { return __builtin_arm_rint32x(__a); } static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a"))) __rint64xf(float __a) { return __builtin_arm_rint64xf(__a); } static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a"))) __rint64x(double __a) { return __builtin_arm_rint64x(__a); } #endif /* Armv8.7-A load/store 64-byte intrinsics */ #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE typedef struct { uint64_t val[8]; } data512_t; static __inline__ data512_t __attribute__((__always_inline__, __nodebug__, target("ls64"))) __arm_ld64b(const void *__addr) { data512_t __value; __builtin_arm_ld64b(__addr, __value.val); return __value; } static __inline__ void __attribute__((__always_inline__, __nodebug__, target("ls64"))) __arm_st64b(void *__addr, data512_t __value) { __builtin_arm_st64b(__addr, __value.val); } static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__, target("ls64"))) __arm_st64bv(void *__addr, data512_t __value) { return __builtin_arm_st64bv(__addr, __value.val); } static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__, target("ls64"))) __arm_st64bv0(void *__addr, data512_t __value) { return __builtin_arm_st64bv0(__addr, __value.val); } #endif /* 10.1 Special register intrinsics */ #define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg) #define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg) #define __arm_rsr128(sysreg) __builtin_arm_rsr128(sysreg) #define __arm_rsrp(sysreg) __builtin_arm_rsrp(sysreg) #define __arm_rsrf(sysreg) __builtin_bit_cast(float, __arm_rsr(sysreg)) #define __arm_rsrf64(sysreg) __builtin_bit_cast(double, __arm_rsr64(sysreg)) #define __arm_wsr(sysreg, v) __builtin_arm_wsr(sysreg, v) #define __arm_wsr64(sysreg, v) __builtin_arm_wsr64(sysreg, v) #define __arm_wsr128(sysreg, v) __builtin_arm_wsr128(sysreg, v) #define __arm_wsrp(sysreg, v) __builtin_arm_wsrp(sysreg, v) #define __arm_wsrf(sysreg, v) __arm_wsr(sysreg, __builtin_bit_cast(uint32_t, v)) #define __arm_wsrf64(sysreg, v) __arm_wsr64(sysreg, __builtin_bit_cast(uint64_t, v)) /* Memory Tagging Extensions (MTE) Intrinsics */ #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE #define __arm_mte_create_random_tag(__ptr, __mask) __builtin_arm_irg(__ptr, __mask) #define __arm_mte_increment_tag(__ptr, __tag_offset) __builtin_arm_addg(__ptr, __tag_offset) #define __arm_mte_exclude_tag(__ptr, __excluded) __builtin_arm_gmi(__ptr, __excluded) #define __arm_mte_get_tag(__ptr) __builtin_arm_ldg(__ptr) #define __arm_mte_set_tag(__ptr) __builtin_arm_stg(__ptr) #define __arm_mte_ptrdiff(__ptra, __ptrb) __builtin_arm_subp(__ptra, __ptrb) /* Memory Operations Intrinsics */ #define __arm_mops_memset_tag(__tagged_address, __value, __size) \ __builtin_arm_mops_memset_tag(__tagged_address, __value, __size) #endif /* Transactional Memory Extension (TME) Intrinsics */ #if defined(__ARM_FEATURE_TME) && __ARM_FEATURE_TME #define _TMFAILURE_REASON 0x00007fffu #define _TMFAILURE_RTRY 0x00008000u #define _TMFAILURE_CNCL 0x00010000u #define _TMFAILURE_MEM 0x00020000u #define _TMFAILURE_IMP 0x00040000u #define _TMFAILURE_ERR 0x00080000u #define _TMFAILURE_SIZE 0x00100000u #define _TMFAILURE_NEST 0x00200000u #define _TMFAILURE_DBG 0x00400000u #define _TMFAILURE_INT 0x00800000u #define _TMFAILURE_TRIVIAL 0x01000000u #define __tstart() __builtin_arm_tstart() #define __tcommit() __builtin_arm_tcommit() #define __tcancel(__arg) __builtin_arm_tcancel(__arg) #define __ttest() __builtin_arm_ttest() #endif /* __ARM_FEATURE_TME */ /* Armv8.5-A Random number generation intrinsics */ #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE static __inline__ int __attribute__((__always_inline__, __nodebug__, target("rand"))) __rndr(uint64_t *__p) { return __builtin_arm_rndr(__p); } static __inline__ int __attribute__((__always_inline__, __nodebug__, target("rand"))) __rndrrs(uint64_t *__p) { return __builtin_arm_rndrrs(__p); } #endif #if defined(__cplusplus) } #endif #endif /* __ARM_ACLE_H */ /*===---- arm_bf16.h - ARM BF16 intrinsics -----------------------------------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __ARM_BF16_H #define __ARM_BF16_H typedef __bf16 bfloat16_t; #define __ai static __inline__ __attribute__((__always_inline__, __nodebug__)) #undef __ai #endif /*===---- arm_cde.h - ARM CDE intrinsics -----------------------------------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __ARM_CDE_H #define __ARM_CDE_H #if !__ARM_FEATURE_CDE #error "CDE support not enabled" #endif #include #ifdef __cplusplus extern "C" { #endif static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx1))) uint32_t __arm_cx1(int, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx1a))) uint32_t __arm_cx1a(int, uint32_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx1d))) uint64_t __arm_cx1d(int, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx1da))) uint64_t __arm_cx1da(int, uint64_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx2))) uint32_t __arm_cx2(int, uint32_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx2a))) uint32_t __arm_cx2a(int, uint32_t, uint32_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx2d))) uint64_t __arm_cx2d(int, uint32_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx2da))) uint64_t __arm_cx2da(int, uint64_t, uint32_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx3))) uint32_t __arm_cx3(int, uint32_t, uint32_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx3a))) uint32_t __arm_cx3a(int, uint32_t, uint32_t, uint32_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx3d))) uint64_t __arm_cx3d(int, uint32_t, uint32_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_cx3da))) uint64_t __arm_cx3da(int, uint64_t, uint32_t, uint32_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx1_u32))) uint32_t __arm_vcx1_u32(int, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx1a_u32))) uint32_t __arm_vcx1a_u32(int, uint32_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx1d_u64))) uint64_t __arm_vcx1d_u64(int, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx1da_u64))) uint64_t __arm_vcx1da_u64(int, uint64_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx2_u32))) uint32_t __arm_vcx2_u32(int, uint32_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx2a_u32))) uint32_t __arm_vcx2a_u32(int, uint32_t, uint32_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx2d_u64))) uint64_t __arm_vcx2d_u64(int, uint64_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx2da_u64))) uint64_t __arm_vcx2da_u64(int, uint64_t, uint64_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx3_u32))) uint32_t __arm_vcx3_u32(int, uint32_t, uint32_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx3a_u32))) uint32_t __arm_vcx3a_u32(int, uint32_t, uint32_t, uint32_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx3d_u64))) uint64_t __arm_vcx3d_u64(int, uint64_t, uint64_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx3da_u64))) uint64_t __arm_vcx3da_u64(int, uint64_t, uint64_t, uint64_t, uint32_t); #if __ARM_FEATURE_MVE typedef uint16_t mve_pred16_t; typedef __attribute__((__neon_vector_type__(8), __clang_arm_mve_strict_polymorphism)) int16_t int16x8_t; typedef __attribute__((__neon_vector_type__(4), __clang_arm_mve_strict_polymorphism)) int32_t int32x4_t; typedef __attribute__((__neon_vector_type__(2), __clang_arm_mve_strict_polymorphism)) int64_t int64x2_t; typedef __attribute__((__neon_vector_type__(16), __clang_arm_mve_strict_polymorphism)) int8_t int8x16_t; typedef __attribute__((__neon_vector_type__(8), __clang_arm_mve_strict_polymorphism)) uint16_t uint16x8_t; typedef __attribute__((__neon_vector_type__(4), __clang_arm_mve_strict_polymorphism)) uint32_t uint32x4_t; typedef __attribute__((__neon_vector_type__(2), __clang_arm_mve_strict_polymorphism)) uint64_t uint64x2_t; typedef __attribute__((__neon_vector_type__(16), __clang_arm_mve_strict_polymorphism)) uint8_t uint8x16_t; static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1q_m_s16))) int16x8_t __arm_vcx1q_m(int, int16x8_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1q_m_s32))) int32x4_t __arm_vcx1q_m(int, int32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1q_m_s64))) int64x2_t __arm_vcx1q_m(int, int64x2_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1q_m_s8))) int8x16_t __arm_vcx1q_m(int, int8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1q_m_u16))) uint16x8_t __arm_vcx1q_m(int, uint16x8_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1q_m_u32))) uint32x4_t __arm_vcx1q_m(int, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1q_m_u64))) uint64x2_t __arm_vcx1q_m(int, uint64x2_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1q_m_u8))) uint8x16_t __arm_vcx1q_m(int, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_cde_vcx1q_u8))) uint8x16_t __arm_vcx1q_u8(int, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_m_s16))) int16x8_t __arm_vcx1qa_m(int, int16x8_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_m_s32))) int32x4_t __arm_vcx1qa_m(int, int32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_m_s64))) int64x2_t __arm_vcx1qa_m(int, int64x2_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_m_s8))) int8x16_t __arm_vcx1qa_m(int, int8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_m_u16))) uint16x8_t __arm_vcx1qa_m(int, uint16x8_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_m_u32))) uint32x4_t __arm_vcx1qa_m(int, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_m_u64))) uint64x2_t __arm_vcx1qa_m(int, uint64x2_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_m_u8))) uint8x16_t __arm_vcx1qa_m(int, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_s16))) int16x8_t __arm_vcx1qa(int, int16x8_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_s32))) int32x4_t __arm_vcx1qa(int, int32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_s64))) int64x2_t __arm_vcx1qa(int, int64x2_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_s8))) int8x16_t __arm_vcx1qa(int, int8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_u16))) uint16x8_t __arm_vcx1qa(int, uint16x8_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_u32))) uint32x4_t __arm_vcx1qa(int, uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_u64))) uint64x2_t __arm_vcx1qa(int, uint64x2_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_u8))) uint8x16_t __arm_vcx1qa(int, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_m_impl_s16))) int16x8_t __arm_vcx2q_m_impl(int, int16x8_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_m_impl_s32))) int32x4_t __arm_vcx2q_m_impl(int, int32x4_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_m_impl_s64))) int64x2_t __arm_vcx2q_m_impl(int, int64x2_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_m_impl_s8))) int8x16_t __arm_vcx2q_m_impl(int, int8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_m_impl_u16))) uint16x8_t __arm_vcx2q_m_impl(int, uint16x8_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_m_impl_u32))) uint32x4_t __arm_vcx2q_m_impl(int, uint32x4_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_m_impl_u64))) uint64x2_t __arm_vcx2q_m_impl(int, uint64x2_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_m_impl_u8))) uint8x16_t __arm_vcx2q_m_impl(int, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_s16))) int16x8_t __arm_vcx2q(int, int16x8_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_s32))) int32x4_t __arm_vcx2q(int, int32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_s64))) int64x2_t __arm_vcx2q(int, int64x2_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_s8))) int8x16_t __arm_vcx2q(int, int8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u16))) uint16x8_t __arm_vcx2q(int, uint16x8_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u32))) uint32x4_t __arm_vcx2q(int, uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u64))) uint64x2_t __arm_vcx2q(int, uint64x2_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u8))) uint8x16_t __arm_vcx2q(int, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u8_s16))) uint8x16_t __arm_vcx2q_u8(int, int16x8_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u8_s32))) uint8x16_t __arm_vcx2q_u8(int, int32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u8_s64))) uint8x16_t __arm_vcx2q_u8(int, int64x2_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u8_s8))) uint8x16_t __arm_vcx2q_u8(int, int8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u8_u16))) uint8x16_t __arm_vcx2q_u8(int, uint16x8_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u8_u32))) uint8x16_t __arm_vcx2q_u8(int, uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u8_u64))) uint8x16_t __arm_vcx2q_u8(int, uint64x2_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u8_u8))) uint8x16_t __arm_vcx2q_u8(int, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_impl_s16))) int16x8_t __arm_vcx2qa_impl(int, int16x8_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_impl_s32))) int32x4_t __arm_vcx2qa_impl(int, int32x4_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_impl_s64))) int64x2_t __arm_vcx2qa_impl(int, int64x2_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_impl_s8))) int8x16_t __arm_vcx2qa_impl(int, int8x16_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_impl_u16))) uint16x8_t __arm_vcx2qa_impl(int, uint16x8_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_impl_u32))) uint32x4_t __arm_vcx2qa_impl(int, uint32x4_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_impl_u64))) uint64x2_t __arm_vcx2qa_impl(int, uint64x2_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_impl_u8))) uint8x16_t __arm_vcx2qa_impl(int, uint8x16_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_m_impl_s16))) int16x8_t __arm_vcx2qa_m_impl(int, int16x8_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_m_impl_s32))) int32x4_t __arm_vcx2qa_m_impl(int, int32x4_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_m_impl_s64))) int64x2_t __arm_vcx2qa_m_impl(int, int64x2_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_m_impl_s8))) int8x16_t __arm_vcx2qa_m_impl(int, int8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_m_impl_u16))) uint16x8_t __arm_vcx2qa_m_impl(int, uint16x8_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_m_impl_u32))) uint32x4_t __arm_vcx2qa_m_impl(int, uint32x4_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_m_impl_u64))) uint64x2_t __arm_vcx2qa_m_impl(int, uint64x2_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_m_impl_u8))) uint8x16_t __arm_vcx2qa_m_impl(int, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_impl_s16))) int16x8_t __arm_vcx3q_impl(int, int16x8_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_impl_s32))) int32x4_t __arm_vcx3q_impl(int, int32x4_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_impl_s64))) int64x2_t __arm_vcx3q_impl(int, int64x2_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_impl_s8))) int8x16_t __arm_vcx3q_impl(int, int8x16_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_impl_u16))) uint16x8_t __arm_vcx3q_impl(int, uint16x8_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_impl_u32))) uint32x4_t __arm_vcx3q_impl(int, uint32x4_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_impl_u64))) uint64x2_t __arm_vcx3q_impl(int, uint64x2_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_impl_u8))) uint8x16_t __arm_vcx3q_impl(int, uint8x16_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_m_impl_s16))) int16x8_t __arm_vcx3q_m_impl(int, int16x8_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_m_impl_s32))) int32x4_t __arm_vcx3q_m_impl(int, int32x4_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_m_impl_s64))) int64x2_t __arm_vcx3q_m_impl(int, int64x2_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_m_impl_s8))) int8x16_t __arm_vcx3q_m_impl(int, int8x16_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_m_impl_u16))) uint16x8_t __arm_vcx3q_m_impl(int, uint16x8_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_m_impl_u32))) uint32x4_t __arm_vcx3q_m_impl(int, uint32x4_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_m_impl_u64))) uint64x2_t __arm_vcx3q_m_impl(int, uint64x2_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_m_impl_u8))) uint8x16_t __arm_vcx3q_m_impl(int, uint8x16_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_u8_impl_s16))) uint8x16_t __arm_vcx3q_u8_impl(int, int16x8_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_u8_impl_s32))) uint8x16_t __arm_vcx3q_u8_impl(int, int32x4_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_u8_impl_s64))) uint8x16_t __arm_vcx3q_u8_impl(int, int64x2_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_u8_impl_s8))) uint8x16_t __arm_vcx3q_u8_impl(int, int8x16_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_u8_impl_u16))) uint8x16_t __arm_vcx3q_u8_impl(int, uint16x8_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_u8_impl_u32))) uint8x16_t __arm_vcx3q_u8_impl(int, uint32x4_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_u8_impl_u64))) uint8x16_t __arm_vcx3q_u8_impl(int, uint64x2_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_u8_impl_u8))) uint8x16_t __arm_vcx3q_u8_impl(int, uint8x16_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_impl_s16))) int16x8_t __arm_vcx3qa_impl(int, int16x8_t, uint8x16_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_impl_s32))) int32x4_t __arm_vcx3qa_impl(int, int32x4_t, uint8x16_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_impl_s64))) int64x2_t __arm_vcx3qa_impl(int, int64x2_t, uint8x16_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_impl_s8))) int8x16_t __arm_vcx3qa_impl(int, int8x16_t, uint8x16_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_impl_u16))) uint16x8_t __arm_vcx3qa_impl(int, uint16x8_t, uint8x16_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_impl_u32))) uint32x4_t __arm_vcx3qa_impl(int, uint32x4_t, uint8x16_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_impl_u64))) uint64x2_t __arm_vcx3qa_impl(int, uint64x2_t, uint8x16_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_impl_u8))) uint8x16_t __arm_vcx3qa_impl(int, uint8x16_t, uint8x16_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_m_impl_s16))) int16x8_t __arm_vcx3qa_m_impl(int, int16x8_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_m_impl_s32))) int32x4_t __arm_vcx3qa_m_impl(int, int32x4_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_m_impl_s64))) int64x2_t __arm_vcx3qa_m_impl(int, int64x2_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_m_impl_s8))) int8x16_t __arm_vcx3qa_m_impl(int, int8x16_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_m_impl_u16))) uint16x8_t __arm_vcx3qa_m_impl(int, uint16x8_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_m_impl_u32))) uint32x4_t __arm_vcx3qa_m_impl(int, uint32x4_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_m_impl_u64))) uint64x2_t __arm_vcx3qa_m_impl(int, uint64x2_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_m_impl_u8))) uint8x16_t __arm_vcx3qa_m_impl(int, uint8x16_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_u8))) int16x8_t __arm_vreinterpretq_s16_u8(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_u8))) int32x4_t __arm_vreinterpretq_s32_u8(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_u8))) int64x2_t __arm_vreinterpretq_s64_u8(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_u8))) int8x16_t __arm_vreinterpretq_s8_u8(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_u8))) uint16x8_t __arm_vreinterpretq_u16_u8(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_u8))) uint32x4_t __arm_vreinterpretq_u32_u8(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_u8))) uint64x2_t __arm_vreinterpretq_u64_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s16))) uint8x16_t __arm_vreinterpretq_u8(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s32))) uint8x16_t __arm_vreinterpretq_u8(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s64))) uint8x16_t __arm_vreinterpretq_u8(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s8))) uint8x16_t __arm_vreinterpretq_u8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_u16))) uint8x16_t __arm_vreinterpretq_u8(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_u32))) uint8x16_t __arm_vreinterpretq_u8(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_u64))) uint8x16_t __arm_vreinterpretq_u8(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vreinterpretq_u8_u8))) uint8x16_t __arm_vreinterpretq_u8(uint8x16_t); #define __arm_vcx2q_m(cp, inactive, n, imm, pred) __arm_vcx2q_m_impl((cp), (inactive), __arm_vreinterpretq_u8(n), (imm), (pred)) #define __arm_vcx2qa(cp, acc, n, imm) __arm_vcx2qa_impl((cp), (acc), __arm_vreinterpretq_u8(n), (imm)) #define __arm_vcx2qa_m(cp, acc, n, imm, pred) __arm_vcx2qa_m_impl((cp), (acc), __arm_vreinterpretq_u8(n), (imm), (pred)) #define __arm_vcx3q(cp, n, m, imm) __arm_vcx3q_impl((cp), (n), __arm_vreinterpretq_u8(m), (imm)) #define __arm_vcx3q_m(cp, inactive, n, m, imm, pred) __arm_vcx3q_m_impl((cp), (inactive), __arm_vreinterpretq_u8(n), __arm_vreinterpretq_u8(m), (imm), (pred)) #define __arm_vcx3q_u8(cp, n, m, imm) __arm_vcx3q_u8_impl((cp), (n), __arm_vreinterpretq_u8(m), (imm)) #define __arm_vcx3qa(cp, acc, n, m, imm) __arm_vcx3qa_impl((cp), (acc), __arm_vreinterpretq_u8(n), __arm_vreinterpretq_u8(m), (imm)) #define __arm_vcx3qa_m(cp, acc, n, m, imm, pred) __arm_vcx3qa_m_impl((cp), (acc), __arm_vreinterpretq_u8(n), __arm_vreinterpretq_u8(m), (imm), (pred)) #endif /* __ARM_FEATURE_MVE */ #if __ARM_FEATURE_MVE & 2 typedef __fp16 float16_t; typedef float float32_t; typedef __attribute__((__neon_vector_type__(8), __clang_arm_mve_strict_polymorphism)) float16_t float16x8_t; typedef __attribute__((__neon_vector_type__(4), __clang_arm_mve_strict_polymorphism)) float32_t float32x4_t; static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1q_m_f16))) float16x8_t __arm_vcx1q_m(int, float16x8_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1q_m_f32))) float32x4_t __arm_vcx1q_m(int, float32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_f16))) float16x8_t __arm_vcx1qa(int, float16x8_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_f32))) float32x4_t __arm_vcx1qa(int, float32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_m_f16))) float16x8_t __arm_vcx1qa_m(int, float16x8_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx1qa_m_f32))) float32x4_t __arm_vcx1qa_m(int, float32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_f16))) float16x8_t __arm_vcx2q(int, float16x8_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_f32))) float32x4_t __arm_vcx2q(int, float32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_m_impl_f16))) float16x8_t __arm_vcx2q_m_impl(int, float16x8_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_m_impl_f32))) float32x4_t __arm_vcx2q_m_impl(int, float32x4_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u8_f16))) uint8x16_t __arm_vcx2q_u8(int, float16x8_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2q_u8_f32))) uint8x16_t __arm_vcx2q_u8(int, float32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_impl_f16))) float16x8_t __arm_vcx2qa_impl(int, float16x8_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_impl_f32))) float32x4_t __arm_vcx2qa_impl(int, float32x4_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_m_impl_f16))) float16x8_t __arm_vcx2qa_m_impl(int, float16x8_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx2qa_m_impl_f32))) float32x4_t __arm_vcx2qa_m_impl(int, float32x4_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_impl_f16))) float16x8_t __arm_vcx3q_impl(int, float16x8_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_impl_f32))) float32x4_t __arm_vcx3q_impl(int, float32x4_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_m_impl_f16))) float16x8_t __arm_vcx3q_m_impl(int, float16x8_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_m_impl_f32))) float32x4_t __arm_vcx3q_m_impl(int, float32x4_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_u8_impl_f16))) uint8x16_t __arm_vcx3q_u8_impl(int, float16x8_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3q_u8_impl_f32))) uint8x16_t __arm_vcx3q_u8_impl(int, float32x4_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_impl_f16))) float16x8_t __arm_vcx3qa_impl(int, float16x8_t, uint8x16_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_impl_f32))) float32x4_t __arm_vcx3qa_impl(int, float32x4_t, uint8x16_t, uint8x16_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_m_impl_f16))) float16x8_t __arm_vcx3qa_m_impl(int, float16x8_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_cde_vcx3qa_m_impl_f32))) float32x4_t __arm_vcx3qa_m_impl(int, float32x4_t, uint8x16_t, uint8x16_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_u8))) float16x8_t __arm_vreinterpretq_f16_u8(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_u8))) float32x4_t __arm_vreinterpretq_f32_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_f16))) uint8x16_t __arm_vreinterpretq_u8(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_f32))) uint8x16_t __arm_vreinterpretq_u8(float32x4_t); #endif /* __ARM_FEATURE_MVE & 2 */ #ifdef __cplusplus } /* extern "C" */ #endif #endif /* __ARM_CDE_H */ //===---- arm_cmse.h - Arm CMSE support -----------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef __ARM_CMSE_H #define __ARM_CMSE_H #if (__ARM_FEATURE_CMSE & 0x1) #include #include #define __ARM_CMSE_SECURE_MODE (__ARM_FEATURE_CMSE & 0x2) #define CMSE_MPU_READWRITE 1 /* checks if readwrite_ok field is set */ #define CMSE_AU_NONSECURE 2 /* checks if permissions have secure field unset */ #define CMSE_MPU_UNPRIV 4 /* sets T flag on TT insrtuction */ #define CMSE_MPU_READ 8 /* checks if read_ok field is set */ #define CMSE_MPU_NONSECURE 16 /* sets A flag, checks if secure field unset */ #define CMSE_NONSECURE (CMSE_AU_NONSECURE | CMSE_MPU_NONSECURE) #define cmse_check_pointed_object(p, f) \ cmse_check_address_range((p), sizeof(*(p)), (f)) #if defined(__cplusplus) extern "C" { #endif typedef union { struct cmse_address_info { #ifdef __ARM_BIG_ENDIAN /* __ARM_BIG_ENDIAN */ #if (__ARM_CMSE_SECURE_MODE) unsigned idau_region : 8; unsigned idau_region_valid : 1; unsigned secure : 1; unsigned nonsecure_readwrite_ok : 1; unsigned nonsecure_read_ok : 1; #else unsigned : 12; #endif unsigned readwrite_ok : 1; unsigned read_ok : 1; #if (__ARM_CMSE_SECURE_MODE) unsigned sau_region_valid : 1; #else unsigned : 1; #endif unsigned mpu_region_valid : 1; #if (__ARM_CMSE_SECURE_MODE) unsigned sau_region : 8; #else unsigned : 8; #endif unsigned mpu_region : 8; #else /* __ARM_LITTLE_ENDIAN */ unsigned mpu_region : 8; #if (__ARM_CMSE_SECURE_MODE) unsigned sau_region : 8; #else unsigned : 8; #endif unsigned mpu_region_valid : 1; #if (__ARM_CMSE_SECURE_MODE) unsigned sau_region_valid : 1; #else unsigned : 1; #endif unsigned read_ok : 1; unsigned readwrite_ok : 1; #if (__ARM_CMSE_SECURE_MODE) unsigned nonsecure_read_ok : 1; unsigned nonsecure_readwrite_ok : 1; unsigned secure : 1; unsigned idau_region_valid : 1; unsigned idau_region : 8; #else unsigned : 12; #endif #endif /*__ARM_LITTLE_ENDIAN */ } flags; unsigned value; } cmse_address_info_t; static cmse_address_info_t __attribute__((__always_inline__, __nodebug__)) cmse_TT(void *__p) { cmse_address_info_t __u; __u.value = __builtin_arm_cmse_TT(__p); return __u; } static cmse_address_info_t __attribute__((__always_inline__, __nodebug__)) cmse_TTT(void *__p) { cmse_address_info_t __u; __u.value = __builtin_arm_cmse_TTT(__p); return __u; } #if __ARM_CMSE_SECURE_MODE static cmse_address_info_t __attribute__((__always_inline__, __nodebug__)) cmse_TTA(void *__p) { cmse_address_info_t __u; __u.value = __builtin_arm_cmse_TTA(__p); return __u; } static cmse_address_info_t __attribute__((__always_inline__, __nodebug__)) cmse_TTAT(void *__p) { cmse_address_info_t __u; __u.value = __builtin_arm_cmse_TTAT(__p); return __u; } #endif #define cmse_TT_fptr(p) cmse_TT(__builtin_bit_cast(void *, (p))) #define cmse_TTT_fptr(p) cmse_TTT(__builtin_bit_cast(void *, (p))) #if __ARM_CMSE_SECURE_MODE #define cmse_TTA_fptr(p) cmse_TTA(__builtin_bit_cast(void *, (p))) #define cmse_TTAT_fptr(p) cmse_TTAT(__builtin_bit_cast(void *, (p))) #endif static void *__attribute__((__always_inline__)) cmse_check_address_range(void *__pb, size_t __s, int __flags) { uintptr_t __begin = (uintptr_t)__pb; uintptr_t __end = __begin + __s - 1; if (__end < __begin) return NULL; /* wrap around check */ /* Check whether the range crosses a 32-bytes aligned address */ const int __single_check = (__begin ^ __end) < 0x20u; /* execute the right variant of the TT instructions */ void *__pe = (void *)__end; cmse_address_info_t __permb, __perme; switch (__flags & (CMSE_MPU_UNPRIV | CMSE_MPU_NONSECURE)) { case 0: __permb = cmse_TT(__pb); __perme = __single_check ? __permb : cmse_TT(__pe); break; case CMSE_MPU_UNPRIV: __permb = cmse_TTT(__pb); __perme = __single_check ? __permb : cmse_TTT(__pe); break; #if __ARM_CMSE_SECURE_MODE case CMSE_MPU_NONSECURE: __permb = cmse_TTA(__pb); __perme = __single_check ? __permb : cmse_TTA(__pe); break; case CMSE_MPU_UNPRIV | CMSE_MPU_NONSECURE: __permb = cmse_TTAT(__pb); __perme = __single_check ? __permb : cmse_TTAT(__pe); break; #endif /* if CMSE_NONSECURE is specified w/o __ARM_CMSE_SECURE_MODE */ default: return NULL; } /* check that the range does not cross MPU, SAU, or IDAU region boundaries */ if (__permb.value != __perme.value) return NULL; #if !(__ARM_CMSE_SECURE_MODE) /* CMSE_AU_NONSECURE is only supported when __ARM_FEATURE_CMSE & 0x2 */ if (__flags & CMSE_AU_NONSECURE) return NULL; #endif /* check the permission on the range */ switch (__flags & ~(CMSE_MPU_UNPRIV | CMSE_MPU_NONSECURE)) { #if (__ARM_CMSE_SECURE_MODE) case CMSE_MPU_READ | CMSE_MPU_READWRITE | CMSE_AU_NONSECURE: case CMSE_MPU_READWRITE | CMSE_AU_NONSECURE: return __permb.flags.nonsecure_readwrite_ok ? __pb : NULL; case CMSE_MPU_READ | CMSE_AU_NONSECURE: return __permb.flags.nonsecure_read_ok ? __pb : NULL; case CMSE_AU_NONSECURE: return __permb.flags.secure ? NULL : __pb; #endif case CMSE_MPU_READ | CMSE_MPU_READWRITE: case CMSE_MPU_READWRITE: return __permb.flags.readwrite_ok ? __pb : NULL; case CMSE_MPU_READ: return __permb.flags.read_ok ? __pb : NULL; default: return NULL; } } #if __ARM_CMSE_SECURE_MODE static int __attribute__((__always_inline__, __nodebug__)) cmse_nonsecure_caller(void) { return !((uintptr_t)__builtin_return_address(0) & 1); } #define cmse_nsfptr_create(p) \ __builtin_bit_cast(__typeof__(p), \ (__builtin_bit_cast(uintptr_t, p) & ~(uintptr_t)1)) #define cmse_is_nsfptr(p) ((__builtin_bit_cast(uintptr_t, p) & 1) == 0) #endif /* __ARM_CMSE_SECURE_MODE */ void __attribute__((__noreturn__)) cmse_abort(void); #if defined(__cplusplus) } #endif #endif /* (__ARM_FEATURE_CMSE & 0x1) */ #endif /* __ARM_CMSE_H */ /*===---- arm_fp16.h - ARM FP16 intrinsics ---------------------------------=== * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * *===-----------------------------------------------------------------------=== */ #ifndef __ARM_FP16_H #define __ARM_FP16_H #include typedef __fp16 float16_t; #define __ai static __inline__ __attribute__((__always_inline__, __nodebug__)) #if defined(__aarch64__) #define vabdh_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (float16_t) __builtin_neon_vabdh_f16(__s0, __s1); \ __ret; \ }) #define vabsh_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vabsh_f16(__s0); \ __ret; \ }) #define vaddh_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (float16_t) __builtin_neon_vaddh_f16(__s0, __s1); \ __ret; \ }) #define vcageh_f16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (uint16_t) __builtin_neon_vcageh_f16(__s0, __s1); \ __ret; \ }) #define vcagth_f16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (uint16_t) __builtin_neon_vcagth_f16(__s0, __s1); \ __ret; \ }) #define vcaleh_f16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (uint16_t) __builtin_neon_vcaleh_f16(__s0, __s1); \ __ret; \ }) #define vcalth_f16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (uint16_t) __builtin_neon_vcalth_f16(__s0, __s1); \ __ret; \ }) #define vceqh_f16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (uint16_t) __builtin_neon_vceqh_f16(__s0, __s1); \ __ret; \ }) #define vceqzh_f16(__p0) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint16_t) __builtin_neon_vceqzh_f16(__s0); \ __ret; \ }) #define vcgeh_f16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (uint16_t) __builtin_neon_vcgeh_f16(__s0, __s1); \ __ret; \ }) #define vcgezh_f16(__p0) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint16_t) __builtin_neon_vcgezh_f16(__s0); \ __ret; \ }) #define vcgth_f16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (uint16_t) __builtin_neon_vcgth_f16(__s0, __s1); \ __ret; \ }) #define vcgtzh_f16(__p0) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint16_t) __builtin_neon_vcgtzh_f16(__s0); \ __ret; \ }) #define vcleh_f16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (uint16_t) __builtin_neon_vcleh_f16(__s0, __s1); \ __ret; \ }) #define vclezh_f16(__p0) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint16_t) __builtin_neon_vclezh_f16(__s0); \ __ret; \ }) #define vclth_f16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (uint16_t) __builtin_neon_vclth_f16(__s0, __s1); \ __ret; \ }) #define vcltzh_f16(__p0) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint16_t) __builtin_neon_vcltzh_f16(__s0); \ __ret; \ }) #define vcvth_n_s16_f16(__p0, __p1) __extension__ ({ \ int16_t __ret; \ float16_t __s0 = __p0; \ __ret = (int16_t) __builtin_neon_vcvth_n_s16_f16(__s0, __p1); \ __ret; \ }) #define vcvth_n_s32_f16(__p0, __p1) __extension__ ({ \ int32_t __ret; \ float16_t __s0 = __p0; \ __ret = (int32_t) __builtin_neon_vcvth_n_s32_f16(__s0, __p1); \ __ret; \ }) #define vcvth_n_s64_f16(__p0, __p1) __extension__ ({ \ int64_t __ret; \ float16_t __s0 = __p0; \ __ret = (int64_t) __builtin_neon_vcvth_n_s64_f16(__s0, __p1); \ __ret; \ }) #define vcvth_n_u16_f16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint16_t) __builtin_neon_vcvth_n_u16_f16(__s0, __p1); \ __ret; \ }) #define vcvth_n_u32_f16(__p0, __p1) __extension__ ({ \ uint32_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint32_t) __builtin_neon_vcvth_n_u32_f16(__s0, __p1); \ __ret; \ }) #define vcvth_n_u64_f16(__p0, __p1) __extension__ ({ \ uint64_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint64_t) __builtin_neon_vcvth_n_u64_f16(__s0, __p1); \ __ret; \ }) #define vcvth_s16_f16(__p0) __extension__ ({ \ int16_t __ret; \ float16_t __s0 = __p0; \ __ret = (int16_t) __builtin_neon_vcvth_s16_f16(__s0); \ __ret; \ }) #define vcvth_s32_f16(__p0) __extension__ ({ \ int32_t __ret; \ float16_t __s0 = __p0; \ __ret = (int32_t) __builtin_neon_vcvth_s32_f16(__s0); \ __ret; \ }) #define vcvth_s64_f16(__p0) __extension__ ({ \ int64_t __ret; \ float16_t __s0 = __p0; \ __ret = (int64_t) __builtin_neon_vcvth_s64_f16(__s0); \ __ret; \ }) #define vcvth_u16_f16(__p0) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint16_t) __builtin_neon_vcvth_u16_f16(__s0); \ __ret; \ }) #define vcvth_u32_f16(__p0) __extension__ ({ \ uint32_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint32_t) __builtin_neon_vcvth_u32_f16(__s0); \ __ret; \ }) #define vcvth_u64_f16(__p0) __extension__ ({ \ uint64_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint64_t) __builtin_neon_vcvth_u64_f16(__s0); \ __ret; \ }) #define vcvtah_s16_f16(__p0) __extension__ ({ \ int16_t __ret; \ float16_t __s0 = __p0; \ __ret = (int16_t) __builtin_neon_vcvtah_s16_f16(__s0); \ __ret; \ }) #define vcvtah_s32_f16(__p0) __extension__ ({ \ int32_t __ret; \ float16_t __s0 = __p0; \ __ret = (int32_t) __builtin_neon_vcvtah_s32_f16(__s0); \ __ret; \ }) #define vcvtah_s64_f16(__p0) __extension__ ({ \ int64_t __ret; \ float16_t __s0 = __p0; \ __ret = (int64_t) __builtin_neon_vcvtah_s64_f16(__s0); \ __ret; \ }) #define vcvtah_u16_f16(__p0) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint16_t) __builtin_neon_vcvtah_u16_f16(__s0); \ __ret; \ }) #define vcvtah_u32_f16(__p0) __extension__ ({ \ uint32_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint32_t) __builtin_neon_vcvtah_u32_f16(__s0); \ __ret; \ }) #define vcvtah_u64_f16(__p0) __extension__ ({ \ uint64_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint64_t) __builtin_neon_vcvtah_u64_f16(__s0); \ __ret; \ }) #define vcvth_f16_u16(__p0) __extension__ ({ \ float16_t __ret; \ uint16_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vcvth_f16_u16(__s0); \ __ret; \ }) #define vcvth_f16_s16(__p0) __extension__ ({ \ float16_t __ret; \ int16_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vcvth_f16_s16(__s0); \ __ret; \ }) #define vcvth_f16_u32(__p0) __extension__ ({ \ float16_t __ret; \ uint32_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vcvth_f16_u32(__s0); \ __ret; \ }) #define vcvth_f16_s32(__p0) __extension__ ({ \ float16_t __ret; \ int32_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vcvth_f16_s32(__s0); \ __ret; \ }) #define vcvth_f16_u64(__p0) __extension__ ({ \ float16_t __ret; \ uint64_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vcvth_f16_u64(__s0); \ __ret; \ }) #define vcvth_f16_s64(__p0) __extension__ ({ \ float16_t __ret; \ int64_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vcvth_f16_s64(__s0); \ __ret; \ }) #define vcvth_n_f16_u32(__p0, __p1) __extension__ ({ \ float16_t __ret; \ uint32_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vcvth_n_f16_u32(__s0, __p1); \ __ret; \ }) #define vcvth_n_f16_s32(__p0, __p1) __extension__ ({ \ float16_t __ret; \ int32_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vcvth_n_f16_s32(__s0, __p1); \ __ret; \ }) #define vcvth_n_f16_u64(__p0, __p1) __extension__ ({ \ float16_t __ret; \ uint64_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vcvth_n_f16_u64(__s0, __p1); \ __ret; \ }) #define vcvth_n_f16_s64(__p0, __p1) __extension__ ({ \ float16_t __ret; \ int64_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vcvth_n_f16_s64(__s0, __p1); \ __ret; \ }) #define vcvth_n_f16_u16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ uint16_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vcvth_n_f16_u16(__s0, __p1); \ __ret; \ }) #define vcvth_n_f16_s16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ int16_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vcvth_n_f16_s16(__s0, __p1); \ __ret; \ }) #define vcvtmh_s16_f16(__p0) __extension__ ({ \ int16_t __ret; \ float16_t __s0 = __p0; \ __ret = (int16_t) __builtin_neon_vcvtmh_s16_f16(__s0); \ __ret; \ }) #define vcvtmh_s32_f16(__p0) __extension__ ({ \ int32_t __ret; \ float16_t __s0 = __p0; \ __ret = (int32_t) __builtin_neon_vcvtmh_s32_f16(__s0); \ __ret; \ }) #define vcvtmh_s64_f16(__p0) __extension__ ({ \ int64_t __ret; \ float16_t __s0 = __p0; \ __ret = (int64_t) __builtin_neon_vcvtmh_s64_f16(__s0); \ __ret; \ }) #define vcvtmh_u16_f16(__p0) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint16_t) __builtin_neon_vcvtmh_u16_f16(__s0); \ __ret; \ }) #define vcvtmh_u32_f16(__p0) __extension__ ({ \ uint32_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint32_t) __builtin_neon_vcvtmh_u32_f16(__s0); \ __ret; \ }) #define vcvtmh_u64_f16(__p0) __extension__ ({ \ uint64_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint64_t) __builtin_neon_vcvtmh_u64_f16(__s0); \ __ret; \ }) #define vcvtnh_s16_f16(__p0) __extension__ ({ \ int16_t __ret; \ float16_t __s0 = __p0; \ __ret = (int16_t) __builtin_neon_vcvtnh_s16_f16(__s0); \ __ret; \ }) #define vcvtnh_s32_f16(__p0) __extension__ ({ \ int32_t __ret; \ float16_t __s0 = __p0; \ __ret = (int32_t) __builtin_neon_vcvtnh_s32_f16(__s0); \ __ret; \ }) #define vcvtnh_s64_f16(__p0) __extension__ ({ \ int64_t __ret; \ float16_t __s0 = __p0; \ __ret = (int64_t) __builtin_neon_vcvtnh_s64_f16(__s0); \ __ret; \ }) #define vcvtnh_u16_f16(__p0) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint16_t) __builtin_neon_vcvtnh_u16_f16(__s0); \ __ret; \ }) #define vcvtnh_u32_f16(__p0) __extension__ ({ \ uint32_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint32_t) __builtin_neon_vcvtnh_u32_f16(__s0); \ __ret; \ }) #define vcvtnh_u64_f16(__p0) __extension__ ({ \ uint64_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint64_t) __builtin_neon_vcvtnh_u64_f16(__s0); \ __ret; \ }) #define vcvtph_s16_f16(__p0) __extension__ ({ \ int16_t __ret; \ float16_t __s0 = __p0; \ __ret = (int16_t) __builtin_neon_vcvtph_s16_f16(__s0); \ __ret; \ }) #define vcvtph_s32_f16(__p0) __extension__ ({ \ int32_t __ret; \ float16_t __s0 = __p0; \ __ret = (int32_t) __builtin_neon_vcvtph_s32_f16(__s0); \ __ret; \ }) #define vcvtph_s64_f16(__p0) __extension__ ({ \ int64_t __ret; \ float16_t __s0 = __p0; \ __ret = (int64_t) __builtin_neon_vcvtph_s64_f16(__s0); \ __ret; \ }) #define vcvtph_u16_f16(__p0) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint16_t) __builtin_neon_vcvtph_u16_f16(__s0); \ __ret; \ }) #define vcvtph_u32_f16(__p0) __extension__ ({ \ uint32_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint32_t) __builtin_neon_vcvtph_u32_f16(__s0); \ __ret; \ }) #define vcvtph_u64_f16(__p0) __extension__ ({ \ uint64_t __ret; \ float16_t __s0 = __p0; \ __ret = (uint64_t) __builtin_neon_vcvtph_u64_f16(__s0); \ __ret; \ }) #define vdivh_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (float16_t) __builtin_neon_vdivh_f16(__s0, __s1); \ __ret; \ }) #define vfmah_f16(__p0, __p1, __p2) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ float16_t __s2 = __p2; \ __ret = (float16_t) __builtin_neon_vfmah_f16(__s0, __s1, __s2); \ __ret; \ }) #define vfmsh_f16(__p0, __p1, __p2) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ float16_t __s2 = __p2; \ __ret = (float16_t) __builtin_neon_vfmsh_f16(__s0, __s1, __s2); \ __ret; \ }) #define vmaxh_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (float16_t) __builtin_neon_vmaxh_f16(__s0, __s1); \ __ret; \ }) #define vmaxnmh_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (float16_t) __builtin_neon_vmaxnmh_f16(__s0, __s1); \ __ret; \ }) #define vminh_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (float16_t) __builtin_neon_vminh_f16(__s0, __s1); \ __ret; \ }) #define vminnmh_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (float16_t) __builtin_neon_vminnmh_f16(__s0, __s1); \ __ret; \ }) #define vmulh_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (float16_t) __builtin_neon_vmulh_f16(__s0, __s1); \ __ret; \ }) #define vmulxh_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (float16_t) __builtin_neon_vmulxh_f16(__s0, __s1); \ __ret; \ }) #define vnegh_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vnegh_f16(__s0); \ __ret; \ }) #define vrecpeh_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vrecpeh_f16(__s0); \ __ret; \ }) #define vrecpsh_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (float16_t) __builtin_neon_vrecpsh_f16(__s0, __s1); \ __ret; \ }) #define vrecpxh_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vrecpxh_f16(__s0); \ __ret; \ }) #define vrndh_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vrndh_f16(__s0); \ __ret; \ }) #define vrndah_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vrndah_f16(__s0); \ __ret; \ }) #define vrndih_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vrndih_f16(__s0); \ __ret; \ }) #define vrndmh_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vrndmh_f16(__s0); \ __ret; \ }) #define vrndnh_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vrndnh_f16(__s0); \ __ret; \ }) #define vrndph_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vrndph_f16(__s0); \ __ret; \ }) #define vrndxh_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vrndxh_f16(__s0); \ __ret; \ }) #define vrsqrteh_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vrsqrteh_f16(__s0); \ __ret; \ }) #define vrsqrtsh_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (float16_t) __builtin_neon_vrsqrtsh_f16(__s0, __s1); \ __ret; \ }) #define vsqrth_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16_t) __builtin_neon_vsqrth_f16(__s0); \ __ret; \ }) #define vsubh_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ __ret = (float16_t) __builtin_neon_vsubh_f16(__s0, __s1); \ __ret; \ }) #endif #undef __ai #endif /* __ARM_FP16_H */ /*===---- arm_mve.h - ARM MVE intrinsics -----------------------------------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __ARM_MVE_H #define __ARM_MVE_H #if !__ARM_FEATURE_MVE #error "MVE support not enabled" #endif #include #ifdef __cplusplus extern "C" { #endif typedef uint16_t mve_pred16_t; typedef __attribute__((__neon_vector_type__(8), __clang_arm_mve_strict_polymorphism)) int16_t int16x8_t; typedef struct { int16x8_t val[2]; } int16x8x2_t; typedef struct { int16x8_t val[4]; } int16x8x4_t; typedef __attribute__((__neon_vector_type__(4), __clang_arm_mve_strict_polymorphism)) int32_t int32x4_t; typedef struct { int32x4_t val[2]; } int32x4x2_t; typedef struct { int32x4_t val[4]; } int32x4x4_t; typedef __attribute__((__neon_vector_type__(2), __clang_arm_mve_strict_polymorphism)) int64_t int64x2_t; typedef struct { int64x2_t val[2]; } int64x2x2_t; typedef struct { int64x2_t val[4]; } int64x2x4_t; typedef __attribute__((__neon_vector_type__(16), __clang_arm_mve_strict_polymorphism)) int8_t int8x16_t; typedef struct { int8x16_t val[2]; } int8x16x2_t; typedef struct { int8x16_t val[4]; } int8x16x4_t; typedef __attribute__((__neon_vector_type__(8), __clang_arm_mve_strict_polymorphism)) uint16_t uint16x8_t; typedef struct { uint16x8_t val[2]; } uint16x8x2_t; typedef struct { uint16x8_t val[4]; } uint16x8x4_t; typedef __attribute__((__neon_vector_type__(4), __clang_arm_mve_strict_polymorphism)) uint32_t uint32x4_t; typedef struct { uint32x4_t val[2]; } uint32x4x2_t; typedef struct { uint32x4_t val[4]; } uint32x4x4_t; typedef __attribute__((__neon_vector_type__(2), __clang_arm_mve_strict_polymorphism)) uint64_t uint64x2_t; typedef struct { uint64x2_t val[2]; } uint64x2x2_t; typedef struct { uint64x2_t val[4]; } uint64x2x4_t; typedef __attribute__((__neon_vector_type__(16), __clang_arm_mve_strict_polymorphism)) uint8_t uint8x16_t; typedef struct { uint8x16_t val[2]; } uint8x16x2_t; typedef struct { uint8x16_t val[4]; } uint8x16x4_t; static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_asrl))) int64_t __arm_asrl(int64_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_lsll))) uint64_t __arm_lsll(uint64_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_sqrshr))) int32_t __arm_sqrshr(int32_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_sqrshrl))) int64_t __arm_sqrshrl(int64_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_sqrshrl_sat48))) int64_t __arm_sqrshrl_sat48(int64_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_sqshl))) int32_t __arm_sqshl(int32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_sqshll))) int64_t __arm_sqshll(int64_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_srshr))) int32_t __arm_srshr(int32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_srshrl))) int64_t __arm_srshrl(int64_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_uqrshl))) uint32_t __arm_uqrshl(uint32_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_uqrshll))) uint64_t __arm_uqrshll(uint64_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_uqrshll_sat48))) uint64_t __arm_uqrshll_sat48(uint64_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_uqshl))) uint32_t __arm_uqshl(uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_uqshll))) uint64_t __arm_uqshll(uint64_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_urshr))) uint32_t __arm_urshr(uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_urshrl))) uint64_t __arm_urshrl(uint64_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_s16))) uint32_t __arm_vabavq_p_s16(uint32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_s16))) uint32_t __arm_vabavq_p(uint32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_s32))) uint32_t __arm_vabavq_p_s32(uint32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_s32))) uint32_t __arm_vabavq_p(uint32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_s8))) uint32_t __arm_vabavq_p_s8(uint32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_s8))) uint32_t __arm_vabavq_p(uint32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_u16))) uint32_t __arm_vabavq_p_u16(uint32_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_u16))) uint32_t __arm_vabavq_p(uint32_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_u32))) uint32_t __arm_vabavq_p_u32(uint32_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_u32))) uint32_t __arm_vabavq_p(uint32_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_u8))) uint32_t __arm_vabavq_p_u8(uint32_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_u8))) uint32_t __arm_vabavq_p(uint32_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_s16))) uint32_t __arm_vabavq_s16(uint32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_s16))) uint32_t __arm_vabavq(uint32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_s32))) uint32_t __arm_vabavq_s32(uint32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_s32))) uint32_t __arm_vabavq(uint32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_s8))) uint32_t __arm_vabavq_s8(uint32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_s8))) uint32_t __arm_vabavq(uint32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_u16))) uint32_t __arm_vabavq_u16(uint32_t, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_u16))) uint32_t __arm_vabavq(uint32_t, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_u32))) uint32_t __arm_vabavq_u32(uint32_t, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_u32))) uint32_t __arm_vabavq(uint32_t, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_u8))) uint32_t __arm_vabavq_u8(uint32_t, uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_u8))) uint32_t __arm_vabavq(uint32_t, uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_s16))) int16x8_t __arm_vabdq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_s16))) int16x8_t __arm_vabdq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_s32))) int32x4_t __arm_vabdq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_s32))) int32x4_t __arm_vabdq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_s8))) int8x16_t __arm_vabdq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_s8))) int8x16_t __arm_vabdq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_u16))) uint16x8_t __arm_vabdq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_u16))) uint16x8_t __arm_vabdq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_u32))) uint32x4_t __arm_vabdq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_u32))) uint32x4_t __arm_vabdq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_u8))) uint8x16_t __arm_vabdq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_u8))) uint8x16_t __arm_vabdq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_s16))) int16x8_t __arm_vabdq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_s16))) int16x8_t __arm_vabdq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_s32))) int32x4_t __arm_vabdq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_s32))) int32x4_t __arm_vabdq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_s8))) int8x16_t __arm_vabdq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_s8))) int8x16_t __arm_vabdq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_u16))) uint16x8_t __arm_vabdq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_u16))) uint16x8_t __arm_vabdq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_u32))) uint32x4_t __arm_vabdq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_u32))) uint32x4_t __arm_vabdq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_u8))) uint8x16_t __arm_vabdq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_u8))) uint8x16_t __arm_vabdq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_s16))) int16x8_t __arm_vabdq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_s16))) int16x8_t __arm_vabdq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_s32))) int32x4_t __arm_vabdq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_s32))) int32x4_t __arm_vabdq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_s8))) int8x16_t __arm_vabdq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_s8))) int8x16_t __arm_vabdq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_u16))) uint16x8_t __arm_vabdq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_u16))) uint16x8_t __arm_vabdq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_u32))) uint32x4_t __arm_vabdq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_u32))) uint32x4_t __arm_vabdq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_u8))) uint8x16_t __arm_vabdq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_u8))) uint8x16_t __arm_vabdq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_s16))) int16x8_t __arm_vabsq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_s16))) int16x8_t __arm_vabsq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_s32))) int32x4_t __arm_vabsq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_s32))) int32x4_t __arm_vabsq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_s8))) int8x16_t __arm_vabsq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_s8))) int8x16_t __arm_vabsq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_s16))) int16x8_t __arm_vabsq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_s16))) int16x8_t __arm_vabsq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_s32))) int32x4_t __arm_vabsq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_s32))) int32x4_t __arm_vabsq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_s8))) int8x16_t __arm_vabsq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_s8))) int8x16_t __arm_vabsq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_s16))) int16x8_t __arm_vabsq_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_s16))) int16x8_t __arm_vabsq_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_s32))) int32x4_t __arm_vabsq_x_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_s32))) int32x4_t __arm_vabsq_x(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_s8))) int8x16_t __arm_vabsq_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_s8))) int8x16_t __arm_vabsq_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vadciq_m_s32))) int32x4_t __arm_vadciq_m_s32(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vadciq_m_s32))) int32x4_t __arm_vadciq_m(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vadciq_m_u32))) uint32x4_t __arm_vadciq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vadciq_m_u32))) uint32x4_t __arm_vadciq_m(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vadciq_s32))) int32x4_t __arm_vadciq_s32(int32x4_t, int32x4_t, unsigned *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vadciq_s32))) int32x4_t __arm_vadciq(int32x4_t, int32x4_t, unsigned *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vadciq_u32))) uint32x4_t __arm_vadciq_u32(uint32x4_t, uint32x4_t, unsigned *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vadciq_u32))) uint32x4_t __arm_vadciq(uint32x4_t, uint32x4_t, unsigned *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vadcq_m_s32))) int32x4_t __arm_vadcq_m_s32(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vadcq_m_s32))) int32x4_t __arm_vadcq_m(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vadcq_m_u32))) uint32x4_t __arm_vadcq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vadcq_m_u32))) uint32x4_t __arm_vadcq_m(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vadcq_s32))) int32x4_t __arm_vadcq_s32(int32x4_t, int32x4_t, unsigned *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vadcq_s32))) int32x4_t __arm_vadcq(int32x4_t, int32x4_t, unsigned *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vadcq_u32))) uint32x4_t __arm_vadcq_u32(uint32x4_t, uint32x4_t, unsigned *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vadcq_u32))) uint32x4_t __arm_vadcq(uint32x4_t, uint32x4_t, unsigned *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddlvaq_p_s32))) int64_t __arm_vaddlvaq_p_s32(int64_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddlvaq_p_s32))) int64_t __arm_vaddlvaq_p(int64_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddlvaq_p_u32))) uint64_t __arm_vaddlvaq_p_u32(uint64_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddlvaq_p_u32))) uint64_t __arm_vaddlvaq_p(uint64_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddlvaq_s32))) int64_t __arm_vaddlvaq_s32(int64_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddlvaq_s32))) int64_t __arm_vaddlvaq(int64_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddlvaq_u32))) uint64_t __arm_vaddlvaq_u32(uint64_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddlvaq_u32))) uint64_t __arm_vaddlvaq(uint64_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddlvq_p_s32))) int64_t __arm_vaddlvq_p_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddlvq_p_s32))) int64_t __arm_vaddlvq_p(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddlvq_p_u32))) uint64_t __arm_vaddlvq_p_u32(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddlvq_p_u32))) uint64_t __arm_vaddlvq_p(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddlvq_s32))) int64_t __arm_vaddlvq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddlvq_s32))) int64_t __arm_vaddlvq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddlvq_u32))) uint64_t __arm_vaddlvq_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddlvq_u32))) uint64_t __arm_vaddlvq(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_s16))) int16x8_t __arm_vaddq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_s16))) int16x8_t __arm_vaddq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_s32))) int32x4_t __arm_vaddq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_s32))) int32x4_t __arm_vaddq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_s8))) int8x16_t __arm_vaddq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_s8))) int8x16_t __arm_vaddq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_u16))) uint16x8_t __arm_vaddq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_u16))) uint16x8_t __arm_vaddq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_u32))) uint32x4_t __arm_vaddq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_u32))) uint32x4_t __arm_vaddq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_u8))) uint8x16_t __arm_vaddq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_u8))) uint8x16_t __arm_vaddq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_s16))) int16x8_t __arm_vaddq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_s16))) int16x8_t __arm_vaddq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_s32))) int32x4_t __arm_vaddq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_s32))) int32x4_t __arm_vaddq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_s8))) int8x16_t __arm_vaddq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_s8))) int8x16_t __arm_vaddq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_u16))) uint16x8_t __arm_vaddq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_u16))) uint16x8_t __arm_vaddq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_u32))) uint32x4_t __arm_vaddq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_u32))) uint32x4_t __arm_vaddq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_u8))) uint8x16_t __arm_vaddq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_u8))) uint8x16_t __arm_vaddq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_s16))) int16x8_t __arm_vaddq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_s16))) int16x8_t __arm_vaddq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_s32))) int32x4_t __arm_vaddq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_s32))) int32x4_t __arm_vaddq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_s8))) int8x16_t __arm_vaddq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_s8))) int8x16_t __arm_vaddq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_u16))) uint16x8_t __arm_vaddq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_u16))) uint16x8_t __arm_vaddq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_u32))) uint32x4_t __arm_vaddq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_u32))) uint32x4_t __arm_vaddq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_u8))) uint8x16_t __arm_vaddq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_u8))) uint8x16_t __arm_vaddq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_s16))) int16x8_t __arm_vaddq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_s16))) int16x8_t __arm_vaddq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_s32))) int32x4_t __arm_vaddq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_s32))) int32x4_t __arm_vaddq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_s8))) int8x16_t __arm_vaddq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_s8))) int8x16_t __arm_vaddq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_u16))) uint16x8_t __arm_vaddq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_u16))) uint16x8_t __arm_vaddq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_u32))) uint32x4_t __arm_vaddq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_u32))) uint32x4_t __arm_vaddq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_u8))) uint8x16_t __arm_vaddq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_u8))) uint8x16_t __arm_vaddq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_s16))) int16x8_t __arm_vaddq_x_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_s16))) int16x8_t __arm_vaddq_x(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_s32))) int32x4_t __arm_vaddq_x_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_s32))) int32x4_t __arm_vaddq_x(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_s8))) int8x16_t __arm_vaddq_x_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_s8))) int8x16_t __arm_vaddq_x(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_u16))) uint16x8_t __arm_vaddq_x_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_u16))) uint16x8_t __arm_vaddq_x(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_u32))) uint32x4_t __arm_vaddq_x_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_u32))) uint32x4_t __arm_vaddq_x(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_u8))) uint8x16_t __arm_vaddq_x_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_u8))) uint8x16_t __arm_vaddq_x(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_s16))) int16x8_t __arm_vaddq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_s16))) int16x8_t __arm_vaddq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_s32))) int32x4_t __arm_vaddq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_s32))) int32x4_t __arm_vaddq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_s8))) int8x16_t __arm_vaddq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_s8))) int8x16_t __arm_vaddq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_u16))) uint16x8_t __arm_vaddq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_u16))) uint16x8_t __arm_vaddq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_u32))) uint32x4_t __arm_vaddq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_u32))) uint32x4_t __arm_vaddq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_u8))) uint8x16_t __arm_vaddq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_u8))) uint8x16_t __arm_vaddq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_s16))) int32_t __arm_vaddvaq_p_s16(int32_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_s16))) int32_t __arm_vaddvaq_p(int32_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_s32))) int32_t __arm_vaddvaq_p_s32(int32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_s32))) int32_t __arm_vaddvaq_p(int32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_s8))) int32_t __arm_vaddvaq_p_s8(int32_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_s8))) int32_t __arm_vaddvaq_p(int32_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_u16))) uint32_t __arm_vaddvaq_p_u16(uint32_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_u16))) uint32_t __arm_vaddvaq_p(uint32_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_u32))) uint32_t __arm_vaddvaq_p_u32(uint32_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_u32))) uint32_t __arm_vaddvaq_p(uint32_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_u8))) uint32_t __arm_vaddvaq_p_u8(uint32_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_u8))) uint32_t __arm_vaddvaq_p(uint32_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_s16))) int32_t __arm_vaddvaq_s16(int32_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_s16))) int32_t __arm_vaddvaq(int32_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_s32))) int32_t __arm_vaddvaq_s32(int32_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_s32))) int32_t __arm_vaddvaq(int32_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_s8))) int32_t __arm_vaddvaq_s8(int32_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_s8))) int32_t __arm_vaddvaq(int32_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_u16))) uint32_t __arm_vaddvaq_u16(uint32_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_u16))) uint32_t __arm_vaddvaq(uint32_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_u32))) uint32_t __arm_vaddvaq_u32(uint32_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_u32))) uint32_t __arm_vaddvaq(uint32_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_u8))) uint32_t __arm_vaddvaq_u8(uint32_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_u8))) uint32_t __arm_vaddvaq(uint32_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_s16))) int32_t __arm_vaddvq_p_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_s16))) int32_t __arm_vaddvq_p(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_s32))) int32_t __arm_vaddvq_p_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_s32))) int32_t __arm_vaddvq_p(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_s8))) int32_t __arm_vaddvq_p_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_s8))) int32_t __arm_vaddvq_p(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_u16))) uint32_t __arm_vaddvq_p_u16(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_u16))) uint32_t __arm_vaddvq_p(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_u32))) uint32_t __arm_vaddvq_p_u32(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_u32))) uint32_t __arm_vaddvq_p(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_u8))) uint32_t __arm_vaddvq_p_u8(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_u8))) uint32_t __arm_vaddvq_p(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_s16))) int32_t __arm_vaddvq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_s16))) int32_t __arm_vaddvq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_s32))) int32_t __arm_vaddvq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_s32))) int32_t __arm_vaddvq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_s8))) int32_t __arm_vaddvq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_s8))) int32_t __arm_vaddvq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_u16))) uint32_t __arm_vaddvq_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_u16))) uint32_t __arm_vaddvq(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_u32))) uint32_t __arm_vaddvq_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_u32))) uint32_t __arm_vaddvq(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_u8))) uint32_t __arm_vaddvq_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_u8))) uint32_t __arm_vaddvq(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_s16))) int16x8_t __arm_vandq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_s16))) int16x8_t __arm_vandq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_s32))) int32x4_t __arm_vandq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_s32))) int32x4_t __arm_vandq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_s8))) int8x16_t __arm_vandq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_s8))) int8x16_t __arm_vandq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_u16))) uint16x8_t __arm_vandq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_u16))) uint16x8_t __arm_vandq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_u32))) uint32x4_t __arm_vandq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_u32))) uint32x4_t __arm_vandq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_u8))) uint8x16_t __arm_vandq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_u8))) uint8x16_t __arm_vandq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_s16))) int16x8_t __arm_vandq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_s16))) int16x8_t __arm_vandq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_s32))) int32x4_t __arm_vandq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_s32))) int32x4_t __arm_vandq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_s8))) int8x16_t __arm_vandq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_s8))) int8x16_t __arm_vandq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_u16))) uint16x8_t __arm_vandq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_u16))) uint16x8_t __arm_vandq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_u32))) uint32x4_t __arm_vandq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_u32))) uint32x4_t __arm_vandq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_u8))) uint8x16_t __arm_vandq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_u8))) uint8x16_t __arm_vandq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_s16))) int16x8_t __arm_vandq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_s16))) int16x8_t __arm_vandq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_s32))) int32x4_t __arm_vandq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_s32))) int32x4_t __arm_vandq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_s8))) int8x16_t __arm_vandq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_s8))) int8x16_t __arm_vandq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_u16))) uint16x8_t __arm_vandq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_u16))) uint16x8_t __arm_vandq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_u32))) uint32x4_t __arm_vandq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_u32))) uint32x4_t __arm_vandq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_u8))) uint8x16_t __arm_vandq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_u8))) uint8x16_t __arm_vandq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_n_s16))) int16x8_t __arm_vbicq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_n_s16))) int16x8_t __arm_vbicq_m_n(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_n_s32))) int32x4_t __arm_vbicq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_n_s32))) int32x4_t __arm_vbicq_m_n(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_n_u16))) uint16x8_t __arm_vbicq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_n_u16))) uint16x8_t __arm_vbicq_m_n(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_n_u32))) uint32x4_t __arm_vbicq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_n_u32))) uint32x4_t __arm_vbicq_m_n(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_s16))) int16x8_t __arm_vbicq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_s16))) int16x8_t __arm_vbicq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_s32))) int32x4_t __arm_vbicq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_s32))) int32x4_t __arm_vbicq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_s8))) int8x16_t __arm_vbicq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_s8))) int8x16_t __arm_vbicq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_u16))) uint16x8_t __arm_vbicq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_u16))) uint16x8_t __arm_vbicq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_u32))) uint32x4_t __arm_vbicq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_u32))) uint32x4_t __arm_vbicq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_u8))) uint8x16_t __arm_vbicq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_u8))) uint8x16_t __arm_vbicq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_n_s16))) int16x8_t __arm_vbicq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_n_s16))) int16x8_t __arm_vbicq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_n_s32))) int32x4_t __arm_vbicq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_n_s32))) int32x4_t __arm_vbicq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_n_u16))) uint16x8_t __arm_vbicq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_n_u16))) uint16x8_t __arm_vbicq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_n_u32))) uint32x4_t __arm_vbicq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_n_u32))) uint32x4_t __arm_vbicq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_s16))) int16x8_t __arm_vbicq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_s16))) int16x8_t __arm_vbicq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_s32))) int32x4_t __arm_vbicq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_s32))) int32x4_t __arm_vbicq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_s8))) int8x16_t __arm_vbicq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_s8))) int8x16_t __arm_vbicq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_u16))) uint16x8_t __arm_vbicq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_u16))) uint16x8_t __arm_vbicq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_u32))) uint32x4_t __arm_vbicq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_u32))) uint32x4_t __arm_vbicq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_u8))) uint8x16_t __arm_vbicq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_u8))) uint8x16_t __arm_vbicq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_s16))) int16x8_t __arm_vbicq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_s16))) int16x8_t __arm_vbicq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_s32))) int32x4_t __arm_vbicq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_s32))) int32x4_t __arm_vbicq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_s8))) int8x16_t __arm_vbicq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_s8))) int8x16_t __arm_vbicq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_u16))) uint16x8_t __arm_vbicq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_u16))) uint16x8_t __arm_vbicq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_u32))) uint32x4_t __arm_vbicq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_u32))) uint32x4_t __arm_vbicq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_u8))) uint8x16_t __arm_vbicq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_u8))) uint8x16_t __arm_vbicq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_s16))) int16x8_t __arm_vbrsrq_m_n_s16(int16x8_t, int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_s16))) int16x8_t __arm_vbrsrq_m(int16x8_t, int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_s32))) int32x4_t __arm_vbrsrq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_s32))) int32x4_t __arm_vbrsrq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_s8))) int8x16_t __arm_vbrsrq_m_n_s8(int8x16_t, int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_s8))) int8x16_t __arm_vbrsrq_m(int8x16_t, int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_u16))) uint16x8_t __arm_vbrsrq_m_n_u16(uint16x8_t, uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_u16))) uint16x8_t __arm_vbrsrq_m(uint16x8_t, uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_u32))) uint32x4_t __arm_vbrsrq_m_n_u32(uint32x4_t, uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_u32))) uint32x4_t __arm_vbrsrq_m(uint32x4_t, uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_u8))) uint8x16_t __arm_vbrsrq_m_n_u8(uint8x16_t, uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_u8))) uint8x16_t __arm_vbrsrq_m(uint8x16_t, uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_s16))) int16x8_t __arm_vbrsrq_n_s16(int16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_s16))) int16x8_t __arm_vbrsrq(int16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_s32))) int32x4_t __arm_vbrsrq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_s32))) int32x4_t __arm_vbrsrq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_s8))) int8x16_t __arm_vbrsrq_n_s8(int8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_s8))) int8x16_t __arm_vbrsrq(int8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_u16))) uint16x8_t __arm_vbrsrq_n_u16(uint16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_u16))) uint16x8_t __arm_vbrsrq(uint16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_u32))) uint32x4_t __arm_vbrsrq_n_u32(uint32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_u32))) uint32x4_t __arm_vbrsrq(uint32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_u8))) uint8x16_t __arm_vbrsrq_n_u8(uint8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_u8))) uint8x16_t __arm_vbrsrq(uint8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_s16))) int16x8_t __arm_vbrsrq_x_n_s16(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_s16))) int16x8_t __arm_vbrsrq_x(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_s32))) int32x4_t __arm_vbrsrq_x_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_s32))) int32x4_t __arm_vbrsrq_x(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_s8))) int8x16_t __arm_vbrsrq_x_n_s8(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_s8))) int8x16_t __arm_vbrsrq_x(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_u16))) uint16x8_t __arm_vbrsrq_x_n_u16(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_u16))) uint16x8_t __arm_vbrsrq_x(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_u32))) uint32x4_t __arm_vbrsrq_x_n_u32(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_u32))) uint32x4_t __arm_vbrsrq_x(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_u8))) uint8x16_t __arm_vbrsrq_x_n_u8(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_u8))) uint8x16_t __arm_vbrsrq_x(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_s16))) int16x8_t __arm_vcaddq_rot270_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_s16))) int16x8_t __arm_vcaddq_rot270_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_s32))) int32x4_t __arm_vcaddq_rot270_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_s32))) int32x4_t __arm_vcaddq_rot270_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_s8))) int8x16_t __arm_vcaddq_rot270_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_s8))) int8x16_t __arm_vcaddq_rot270_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_u16))) uint16x8_t __arm_vcaddq_rot270_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_u16))) uint16x8_t __arm_vcaddq_rot270_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_u32))) uint32x4_t __arm_vcaddq_rot270_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_u32))) uint32x4_t __arm_vcaddq_rot270_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_u8))) uint8x16_t __arm_vcaddq_rot270_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_u8))) uint8x16_t __arm_vcaddq_rot270_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_s16))) int16x8_t __arm_vcaddq_rot270_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_s16))) int16x8_t __arm_vcaddq_rot270(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_s32))) int32x4_t __arm_vcaddq_rot270_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_s32))) int32x4_t __arm_vcaddq_rot270(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_s8))) int8x16_t __arm_vcaddq_rot270_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_s8))) int8x16_t __arm_vcaddq_rot270(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_u16))) uint16x8_t __arm_vcaddq_rot270_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_u16))) uint16x8_t __arm_vcaddq_rot270(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_u32))) uint32x4_t __arm_vcaddq_rot270_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_u32))) uint32x4_t __arm_vcaddq_rot270(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_u8))) uint8x16_t __arm_vcaddq_rot270_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_u8))) uint8x16_t __arm_vcaddq_rot270(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_s16))) int16x8_t __arm_vcaddq_rot270_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_s16))) int16x8_t __arm_vcaddq_rot270_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_s32))) int32x4_t __arm_vcaddq_rot270_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_s32))) int32x4_t __arm_vcaddq_rot270_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_s8))) int8x16_t __arm_vcaddq_rot270_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_s8))) int8x16_t __arm_vcaddq_rot270_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_u16))) uint16x8_t __arm_vcaddq_rot270_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_u16))) uint16x8_t __arm_vcaddq_rot270_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_u32))) uint32x4_t __arm_vcaddq_rot270_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_u32))) uint32x4_t __arm_vcaddq_rot270_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_u8))) uint8x16_t __arm_vcaddq_rot270_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_u8))) uint8x16_t __arm_vcaddq_rot270_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_s16))) int16x8_t __arm_vcaddq_rot90_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_s16))) int16x8_t __arm_vcaddq_rot90_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_s32))) int32x4_t __arm_vcaddq_rot90_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_s32))) int32x4_t __arm_vcaddq_rot90_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_s8))) int8x16_t __arm_vcaddq_rot90_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_s8))) int8x16_t __arm_vcaddq_rot90_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_u16))) uint16x8_t __arm_vcaddq_rot90_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_u16))) uint16x8_t __arm_vcaddq_rot90_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_u32))) uint32x4_t __arm_vcaddq_rot90_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_u32))) uint32x4_t __arm_vcaddq_rot90_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_u8))) uint8x16_t __arm_vcaddq_rot90_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_u8))) uint8x16_t __arm_vcaddq_rot90_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_s16))) int16x8_t __arm_vcaddq_rot90_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_s16))) int16x8_t __arm_vcaddq_rot90(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_s32))) int32x4_t __arm_vcaddq_rot90_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_s32))) int32x4_t __arm_vcaddq_rot90(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_s8))) int8x16_t __arm_vcaddq_rot90_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_s8))) int8x16_t __arm_vcaddq_rot90(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_u16))) uint16x8_t __arm_vcaddq_rot90_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_u16))) uint16x8_t __arm_vcaddq_rot90(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_u32))) uint32x4_t __arm_vcaddq_rot90_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_u32))) uint32x4_t __arm_vcaddq_rot90(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_u8))) uint8x16_t __arm_vcaddq_rot90_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_u8))) uint8x16_t __arm_vcaddq_rot90(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_s16))) int16x8_t __arm_vcaddq_rot90_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_s16))) int16x8_t __arm_vcaddq_rot90_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_s32))) int32x4_t __arm_vcaddq_rot90_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_s32))) int32x4_t __arm_vcaddq_rot90_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_s8))) int8x16_t __arm_vcaddq_rot90_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_s8))) int8x16_t __arm_vcaddq_rot90_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_u16))) uint16x8_t __arm_vcaddq_rot90_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_u16))) uint16x8_t __arm_vcaddq_rot90_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_u32))) uint32x4_t __arm_vcaddq_rot90_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_u32))) uint32x4_t __arm_vcaddq_rot90_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_u8))) uint8x16_t __arm_vcaddq_rot90_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_u8))) uint8x16_t __arm_vcaddq_rot90_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_m_s16))) int16x8_t __arm_vclsq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_m_s16))) int16x8_t __arm_vclsq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_m_s32))) int32x4_t __arm_vclsq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_m_s32))) int32x4_t __arm_vclsq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_m_s8))) int8x16_t __arm_vclsq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_m_s8))) int8x16_t __arm_vclsq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_s16))) int16x8_t __arm_vclsq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_s16))) int16x8_t __arm_vclsq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_s32))) int32x4_t __arm_vclsq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_s32))) int32x4_t __arm_vclsq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_s8))) int8x16_t __arm_vclsq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_s8))) int8x16_t __arm_vclsq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_x_s16))) int16x8_t __arm_vclsq_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_x_s16))) int16x8_t __arm_vclsq_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_x_s32))) int32x4_t __arm_vclsq_x_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_x_s32))) int32x4_t __arm_vclsq_x(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_x_s8))) int8x16_t __arm_vclsq_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_x_s8))) int8x16_t __arm_vclsq_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_s16))) int16x8_t __arm_vclzq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_s16))) int16x8_t __arm_vclzq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_s32))) int32x4_t __arm_vclzq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_s32))) int32x4_t __arm_vclzq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_s8))) int8x16_t __arm_vclzq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_s8))) int8x16_t __arm_vclzq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_u16))) uint16x8_t __arm_vclzq_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_u16))) uint16x8_t __arm_vclzq_m(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_u32))) uint32x4_t __arm_vclzq_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_u32))) uint32x4_t __arm_vclzq_m(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_u8))) uint8x16_t __arm_vclzq_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_u8))) uint8x16_t __arm_vclzq_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_s16))) int16x8_t __arm_vclzq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_s16))) int16x8_t __arm_vclzq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_s32))) int32x4_t __arm_vclzq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_s32))) int32x4_t __arm_vclzq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_s8))) int8x16_t __arm_vclzq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_s8))) int8x16_t __arm_vclzq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_u16))) uint16x8_t __arm_vclzq_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_u16))) uint16x8_t __arm_vclzq(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_u32))) uint32x4_t __arm_vclzq_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_u32))) uint32x4_t __arm_vclzq(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_u8))) uint8x16_t __arm_vclzq_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_u8))) uint8x16_t __arm_vclzq(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_s16))) int16x8_t __arm_vclzq_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_s16))) int16x8_t __arm_vclzq_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_s32))) int32x4_t __arm_vclzq_x_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_s32))) int32x4_t __arm_vclzq_x(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_s8))) int8x16_t __arm_vclzq_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_s8))) int8x16_t __arm_vclzq_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_u16))) uint16x8_t __arm_vclzq_x_u16(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_u16))) uint16x8_t __arm_vclzq_x(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_u32))) uint32x4_t __arm_vclzq_x_u32(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_u32))) uint32x4_t __arm_vclzq_x(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_u8))) uint8x16_t __arm_vclzq_x_u8(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_u8))) uint8x16_t __arm_vclzq_x(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_n_u16))) mve_pred16_t __arm_vcmpcsq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_n_u16))) mve_pred16_t __arm_vcmpcsq_m(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_n_u32))) mve_pred16_t __arm_vcmpcsq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_n_u32))) mve_pred16_t __arm_vcmpcsq_m(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_n_u8))) mve_pred16_t __arm_vcmpcsq_m_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_n_u8))) mve_pred16_t __arm_vcmpcsq_m(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_u16))) mve_pred16_t __arm_vcmpcsq_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_u16))) mve_pred16_t __arm_vcmpcsq_m(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_u32))) mve_pred16_t __arm_vcmpcsq_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_u32))) mve_pred16_t __arm_vcmpcsq_m(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_u8))) mve_pred16_t __arm_vcmpcsq_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_u8))) mve_pred16_t __arm_vcmpcsq_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_n_u16))) mve_pred16_t __arm_vcmpcsq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_n_u16))) mve_pred16_t __arm_vcmpcsq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_n_u32))) mve_pred16_t __arm_vcmpcsq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_n_u32))) mve_pred16_t __arm_vcmpcsq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_n_u8))) mve_pred16_t __arm_vcmpcsq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_n_u8))) mve_pred16_t __arm_vcmpcsq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_u16))) mve_pred16_t __arm_vcmpcsq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_u16))) mve_pred16_t __arm_vcmpcsq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_u32))) mve_pred16_t __arm_vcmpcsq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_u32))) mve_pred16_t __arm_vcmpcsq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_u8))) mve_pred16_t __arm_vcmpcsq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_u8))) mve_pred16_t __arm_vcmpcsq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_s16))) mve_pred16_t __arm_vcmpeqq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_s16))) mve_pred16_t __arm_vcmpeqq_m(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_s32))) mve_pred16_t __arm_vcmpeqq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_s32))) mve_pred16_t __arm_vcmpeqq_m(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_s8))) mve_pred16_t __arm_vcmpeqq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_s8))) mve_pred16_t __arm_vcmpeqq_m(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_u16))) mve_pred16_t __arm_vcmpeqq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_u16))) mve_pred16_t __arm_vcmpeqq_m(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_u32))) mve_pred16_t __arm_vcmpeqq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_u32))) mve_pred16_t __arm_vcmpeqq_m(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_u8))) mve_pred16_t __arm_vcmpeqq_m_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_u8))) mve_pred16_t __arm_vcmpeqq_m(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_s16))) mve_pred16_t __arm_vcmpeqq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_s16))) mve_pred16_t __arm_vcmpeqq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_s32))) mve_pred16_t __arm_vcmpeqq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_s32))) mve_pred16_t __arm_vcmpeqq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_s8))) mve_pred16_t __arm_vcmpeqq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_s8))) mve_pred16_t __arm_vcmpeqq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_u16))) mve_pred16_t __arm_vcmpeqq_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_u16))) mve_pred16_t __arm_vcmpeqq_m(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_u32))) mve_pred16_t __arm_vcmpeqq_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_u32))) mve_pred16_t __arm_vcmpeqq_m(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_u8))) mve_pred16_t __arm_vcmpeqq_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_u8))) mve_pred16_t __arm_vcmpeqq_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_s16))) mve_pred16_t __arm_vcmpeqq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_s16))) mve_pred16_t __arm_vcmpeqq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_s32))) mve_pred16_t __arm_vcmpeqq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_s32))) mve_pred16_t __arm_vcmpeqq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_s8))) mve_pred16_t __arm_vcmpeqq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_s8))) mve_pred16_t __arm_vcmpeqq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_u16))) mve_pred16_t __arm_vcmpeqq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_u16))) mve_pred16_t __arm_vcmpeqq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_u32))) mve_pred16_t __arm_vcmpeqq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_u32))) mve_pred16_t __arm_vcmpeqq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_u8))) mve_pred16_t __arm_vcmpeqq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_u8))) mve_pred16_t __arm_vcmpeqq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_s16))) mve_pred16_t __arm_vcmpeqq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_s16))) mve_pred16_t __arm_vcmpeqq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_s32))) mve_pred16_t __arm_vcmpeqq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_s32))) mve_pred16_t __arm_vcmpeqq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_s8))) mve_pred16_t __arm_vcmpeqq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_s8))) mve_pred16_t __arm_vcmpeqq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_u16))) mve_pred16_t __arm_vcmpeqq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_u16))) mve_pred16_t __arm_vcmpeqq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_u32))) mve_pred16_t __arm_vcmpeqq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_u32))) mve_pred16_t __arm_vcmpeqq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_u8))) mve_pred16_t __arm_vcmpeqq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_u8))) mve_pred16_t __arm_vcmpeqq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_s16))) mve_pred16_t __arm_vcmpgeq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_s16))) mve_pred16_t __arm_vcmpgeq_m(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_s32))) mve_pred16_t __arm_vcmpgeq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_s32))) mve_pred16_t __arm_vcmpgeq_m(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_s8))) mve_pred16_t __arm_vcmpgeq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_s8))) mve_pred16_t __arm_vcmpgeq_m(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_s16))) mve_pred16_t __arm_vcmpgeq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_s16))) mve_pred16_t __arm_vcmpgeq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_s32))) mve_pred16_t __arm_vcmpgeq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_s32))) mve_pred16_t __arm_vcmpgeq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_s8))) mve_pred16_t __arm_vcmpgeq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_s8))) mve_pred16_t __arm_vcmpgeq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_s16))) mve_pred16_t __arm_vcmpgeq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_s16))) mve_pred16_t __arm_vcmpgeq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_s32))) mve_pred16_t __arm_vcmpgeq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_s32))) mve_pred16_t __arm_vcmpgeq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_s8))) mve_pred16_t __arm_vcmpgeq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_s8))) mve_pred16_t __arm_vcmpgeq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_s16))) mve_pred16_t __arm_vcmpgeq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_s16))) mve_pred16_t __arm_vcmpgeq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_s32))) mve_pred16_t __arm_vcmpgeq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_s32))) mve_pred16_t __arm_vcmpgeq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_s8))) mve_pred16_t __arm_vcmpgeq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_s8))) mve_pred16_t __arm_vcmpgeq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_s16))) mve_pred16_t __arm_vcmpgtq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_s16))) mve_pred16_t __arm_vcmpgtq_m(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_s32))) mve_pred16_t __arm_vcmpgtq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_s32))) mve_pred16_t __arm_vcmpgtq_m(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_s8))) mve_pred16_t __arm_vcmpgtq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_s8))) mve_pred16_t __arm_vcmpgtq_m(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_s16))) mve_pred16_t __arm_vcmpgtq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_s16))) mve_pred16_t __arm_vcmpgtq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_s32))) mve_pred16_t __arm_vcmpgtq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_s32))) mve_pred16_t __arm_vcmpgtq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_s8))) mve_pred16_t __arm_vcmpgtq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_s8))) mve_pred16_t __arm_vcmpgtq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_s16))) mve_pred16_t __arm_vcmpgtq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_s16))) mve_pred16_t __arm_vcmpgtq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_s32))) mve_pred16_t __arm_vcmpgtq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_s32))) mve_pred16_t __arm_vcmpgtq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_s8))) mve_pred16_t __arm_vcmpgtq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_s8))) mve_pred16_t __arm_vcmpgtq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_s16))) mve_pred16_t __arm_vcmpgtq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_s16))) mve_pred16_t __arm_vcmpgtq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_s32))) mve_pred16_t __arm_vcmpgtq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_s32))) mve_pred16_t __arm_vcmpgtq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_s8))) mve_pred16_t __arm_vcmpgtq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_s8))) mve_pred16_t __arm_vcmpgtq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_n_u16))) mve_pred16_t __arm_vcmphiq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_n_u16))) mve_pred16_t __arm_vcmphiq_m(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_n_u32))) mve_pred16_t __arm_vcmphiq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_n_u32))) mve_pred16_t __arm_vcmphiq_m(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_n_u8))) mve_pred16_t __arm_vcmphiq_m_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_n_u8))) mve_pred16_t __arm_vcmphiq_m(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_u16))) mve_pred16_t __arm_vcmphiq_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_u16))) mve_pred16_t __arm_vcmphiq_m(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_u32))) mve_pred16_t __arm_vcmphiq_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_u32))) mve_pred16_t __arm_vcmphiq_m(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_u8))) mve_pred16_t __arm_vcmphiq_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_u8))) mve_pred16_t __arm_vcmphiq_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_n_u16))) mve_pred16_t __arm_vcmphiq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_n_u16))) mve_pred16_t __arm_vcmphiq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_n_u32))) mve_pred16_t __arm_vcmphiq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_n_u32))) mve_pred16_t __arm_vcmphiq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_n_u8))) mve_pred16_t __arm_vcmphiq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_n_u8))) mve_pred16_t __arm_vcmphiq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_u16))) mve_pred16_t __arm_vcmphiq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_u16))) mve_pred16_t __arm_vcmphiq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_u32))) mve_pred16_t __arm_vcmphiq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_u32))) mve_pred16_t __arm_vcmphiq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_u8))) mve_pred16_t __arm_vcmphiq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_u8))) mve_pred16_t __arm_vcmphiq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_s16))) mve_pred16_t __arm_vcmpleq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_s16))) mve_pred16_t __arm_vcmpleq_m(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_s32))) mve_pred16_t __arm_vcmpleq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_s32))) mve_pred16_t __arm_vcmpleq_m(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_s8))) mve_pred16_t __arm_vcmpleq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_s8))) mve_pred16_t __arm_vcmpleq_m(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_s16))) mve_pred16_t __arm_vcmpleq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_s16))) mve_pred16_t __arm_vcmpleq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_s32))) mve_pred16_t __arm_vcmpleq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_s32))) mve_pred16_t __arm_vcmpleq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_s8))) mve_pred16_t __arm_vcmpleq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_s8))) mve_pred16_t __arm_vcmpleq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_s16))) mve_pred16_t __arm_vcmpleq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_s16))) mve_pred16_t __arm_vcmpleq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_s32))) mve_pred16_t __arm_vcmpleq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_s32))) mve_pred16_t __arm_vcmpleq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_s8))) mve_pred16_t __arm_vcmpleq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_s8))) mve_pred16_t __arm_vcmpleq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_s16))) mve_pred16_t __arm_vcmpleq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_s16))) mve_pred16_t __arm_vcmpleq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_s32))) mve_pred16_t __arm_vcmpleq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_s32))) mve_pred16_t __arm_vcmpleq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_s8))) mve_pred16_t __arm_vcmpleq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_s8))) mve_pred16_t __arm_vcmpleq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_s16))) mve_pred16_t __arm_vcmpltq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_s16))) mve_pred16_t __arm_vcmpltq_m(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_s32))) mve_pred16_t __arm_vcmpltq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_s32))) mve_pred16_t __arm_vcmpltq_m(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_s8))) mve_pred16_t __arm_vcmpltq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_s8))) mve_pred16_t __arm_vcmpltq_m(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_s16))) mve_pred16_t __arm_vcmpltq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_s16))) mve_pred16_t __arm_vcmpltq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_s32))) mve_pred16_t __arm_vcmpltq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_s32))) mve_pred16_t __arm_vcmpltq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_s8))) mve_pred16_t __arm_vcmpltq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_s8))) mve_pred16_t __arm_vcmpltq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_s16))) mve_pred16_t __arm_vcmpltq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_s16))) mve_pred16_t __arm_vcmpltq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_s32))) mve_pred16_t __arm_vcmpltq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_s32))) mve_pred16_t __arm_vcmpltq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_s8))) mve_pred16_t __arm_vcmpltq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_s8))) mve_pred16_t __arm_vcmpltq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_s16))) mve_pred16_t __arm_vcmpltq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_s16))) mve_pred16_t __arm_vcmpltq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_s32))) mve_pred16_t __arm_vcmpltq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_s32))) mve_pred16_t __arm_vcmpltq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_s8))) mve_pred16_t __arm_vcmpltq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_s8))) mve_pred16_t __arm_vcmpltq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_s16))) mve_pred16_t __arm_vcmpneq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_s16))) mve_pred16_t __arm_vcmpneq_m(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_s32))) mve_pred16_t __arm_vcmpneq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_s32))) mve_pred16_t __arm_vcmpneq_m(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_s8))) mve_pred16_t __arm_vcmpneq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_s8))) mve_pred16_t __arm_vcmpneq_m(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_u16))) mve_pred16_t __arm_vcmpneq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_u16))) mve_pred16_t __arm_vcmpneq_m(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_u32))) mve_pred16_t __arm_vcmpneq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_u32))) mve_pred16_t __arm_vcmpneq_m(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_u8))) mve_pred16_t __arm_vcmpneq_m_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_u8))) mve_pred16_t __arm_vcmpneq_m(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_s16))) mve_pred16_t __arm_vcmpneq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_s16))) mve_pred16_t __arm_vcmpneq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_s32))) mve_pred16_t __arm_vcmpneq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_s32))) mve_pred16_t __arm_vcmpneq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_s8))) mve_pred16_t __arm_vcmpneq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_s8))) mve_pred16_t __arm_vcmpneq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_u16))) mve_pred16_t __arm_vcmpneq_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_u16))) mve_pred16_t __arm_vcmpneq_m(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_u32))) mve_pred16_t __arm_vcmpneq_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_u32))) mve_pred16_t __arm_vcmpneq_m(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_u8))) mve_pred16_t __arm_vcmpneq_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_u8))) mve_pred16_t __arm_vcmpneq_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_s16))) mve_pred16_t __arm_vcmpneq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_s16))) mve_pred16_t __arm_vcmpneq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_s32))) mve_pred16_t __arm_vcmpneq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_s32))) mve_pred16_t __arm_vcmpneq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_s8))) mve_pred16_t __arm_vcmpneq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_s8))) mve_pred16_t __arm_vcmpneq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_u16))) mve_pred16_t __arm_vcmpneq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_u16))) mve_pred16_t __arm_vcmpneq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_u32))) mve_pred16_t __arm_vcmpneq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_u32))) mve_pred16_t __arm_vcmpneq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_u8))) mve_pred16_t __arm_vcmpneq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_u8))) mve_pred16_t __arm_vcmpneq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_s16))) mve_pred16_t __arm_vcmpneq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_s16))) mve_pred16_t __arm_vcmpneq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_s32))) mve_pred16_t __arm_vcmpneq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_s32))) mve_pred16_t __arm_vcmpneq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_s8))) mve_pred16_t __arm_vcmpneq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_s8))) mve_pred16_t __arm_vcmpneq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_u16))) mve_pred16_t __arm_vcmpneq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_u16))) mve_pred16_t __arm_vcmpneq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_u32))) mve_pred16_t __arm_vcmpneq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_u32))) mve_pred16_t __arm_vcmpneq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_u8))) mve_pred16_t __arm_vcmpneq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_u8))) mve_pred16_t __arm_vcmpneq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_s16))) int16x8_t __arm_vcreateq_s16(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_s32))) int32x4_t __arm_vcreateq_s32(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_s64))) int64x2_t __arm_vcreateq_s64(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_s8))) int8x16_t __arm_vcreateq_s8(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_u16))) uint16x8_t __arm_vcreateq_u16(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_u32))) uint32x4_t __arm_vcreateq_u32(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_u64))) uint64x2_t __arm_vcreateq_u64(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_u8))) uint8x16_t __arm_vcreateq_u8(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vctp16q))) mve_pred16_t __arm_vctp16q(uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vctp16q_m))) mve_pred16_t __arm_vctp16q_m(uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vctp32q))) mve_pred16_t __arm_vctp32q(uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vctp32q_m))) mve_pred16_t __arm_vctp32q_m(uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vctp64q))) mve_pred16_t __arm_vctp64q(uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vctp64q_m))) mve_pred16_t __arm_vctp64q_m(uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vctp8q))) mve_pred16_t __arm_vctp8q(uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vctp8q_m))) mve_pred16_t __arm_vctp8q_m(uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_n_u16))) uint16x8_t __arm_vddupq_m_n_u16(uint16x8_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_n_u16))) uint16x8_t __arm_vddupq_m(uint16x8_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_n_u32))) uint32x4_t __arm_vddupq_m_n_u32(uint32x4_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_n_u32))) uint32x4_t __arm_vddupq_m(uint32x4_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_n_u8))) uint8x16_t __arm_vddupq_m_n_u8(uint8x16_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_n_u8))) uint8x16_t __arm_vddupq_m(uint8x16_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_wb_u16))) uint16x8_t __arm_vddupq_m_wb_u16(uint16x8_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_wb_u16))) uint16x8_t __arm_vddupq_m(uint16x8_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_wb_u32))) uint32x4_t __arm_vddupq_m_wb_u32(uint32x4_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_wb_u32))) uint32x4_t __arm_vddupq_m(uint32x4_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_wb_u8))) uint8x16_t __arm_vddupq_m_wb_u8(uint8x16_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_wb_u8))) uint8x16_t __arm_vddupq_m(uint8x16_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_n_u16))) uint16x8_t __arm_vddupq_n_u16(uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_n_u16))) uint16x8_t __arm_vddupq_u16(uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_n_u32))) uint32x4_t __arm_vddupq_n_u32(uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_n_u32))) uint32x4_t __arm_vddupq_u32(uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_n_u8))) uint8x16_t __arm_vddupq_n_u8(uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_n_u8))) uint8x16_t __arm_vddupq_u8(uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_wb_u16))) uint16x8_t __arm_vddupq_wb_u16(uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_wb_u16))) uint16x8_t __arm_vddupq_u16(uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_wb_u32))) uint32x4_t __arm_vddupq_wb_u32(uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_wb_u32))) uint32x4_t __arm_vddupq_u32(uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_wb_u8))) uint8x16_t __arm_vddupq_wb_u8(uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_wb_u8))) uint8x16_t __arm_vddupq_u8(uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_n_u16))) uint16x8_t __arm_vddupq_x_n_u16(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_n_u16))) uint16x8_t __arm_vddupq_x_u16(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_n_u32))) uint32x4_t __arm_vddupq_x_n_u32(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_n_u32))) uint32x4_t __arm_vddupq_x_u32(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_n_u8))) uint8x16_t __arm_vddupq_x_n_u8(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_n_u8))) uint8x16_t __arm_vddupq_x_u8(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_wb_u16))) uint16x8_t __arm_vddupq_x_wb_u16(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_wb_u16))) uint16x8_t __arm_vddupq_x_u16(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_wb_u32))) uint32x4_t __arm_vddupq_x_wb_u32(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_wb_u32))) uint32x4_t __arm_vddupq_x_u32(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_wb_u8))) uint8x16_t __arm_vddupq_x_wb_u8(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_wb_u8))) uint8x16_t __arm_vddupq_x_u8(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_s16))) int16x8_t __arm_vdupq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_s16))) int16x8_t __arm_vdupq_m(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_s32))) int32x4_t __arm_vdupq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_s32))) int32x4_t __arm_vdupq_m(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_s8))) int8x16_t __arm_vdupq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_s8))) int8x16_t __arm_vdupq_m(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_u16))) uint16x8_t __arm_vdupq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_u16))) uint16x8_t __arm_vdupq_m(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_u32))) uint32x4_t __arm_vdupq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_u32))) uint32x4_t __arm_vdupq_m(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_u8))) uint8x16_t __arm_vdupq_m_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_u8))) uint8x16_t __arm_vdupq_m(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_n_s16))) int16x8_t __arm_vdupq_n_s16(int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_n_s32))) int32x4_t __arm_vdupq_n_s32(int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_n_s8))) int8x16_t __arm_vdupq_n_s8(int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_n_u16))) uint16x8_t __arm_vdupq_n_u16(uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_n_u32))) uint32x4_t __arm_vdupq_n_u32(uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_n_u8))) uint8x16_t __arm_vdupq_n_u8(uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_x_n_s16))) int16x8_t __arm_vdupq_x_n_s16(int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_x_n_s32))) int32x4_t __arm_vdupq_x_n_s32(int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_x_n_s8))) int8x16_t __arm_vdupq_x_n_s8(int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_x_n_u16))) uint16x8_t __arm_vdupq_x_n_u16(uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_x_n_u32))) uint32x4_t __arm_vdupq_x_n_u32(uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_x_n_u8))) uint8x16_t __arm_vdupq_x_n_u8(uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_n_u16))) uint16x8_t __arm_vdwdupq_m_n_u16(uint16x8_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_n_u16))) uint16x8_t __arm_vdwdupq_m(uint16x8_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_n_u32))) uint32x4_t __arm_vdwdupq_m_n_u32(uint32x4_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_n_u32))) uint32x4_t __arm_vdwdupq_m(uint32x4_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_n_u8))) uint8x16_t __arm_vdwdupq_m_n_u8(uint8x16_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_n_u8))) uint8x16_t __arm_vdwdupq_m(uint8x16_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_wb_u16))) uint16x8_t __arm_vdwdupq_m_wb_u16(uint16x8_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_wb_u16))) uint16x8_t __arm_vdwdupq_m(uint16x8_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_wb_u32))) uint32x4_t __arm_vdwdupq_m_wb_u32(uint32x4_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_wb_u32))) uint32x4_t __arm_vdwdupq_m(uint32x4_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_wb_u8))) uint8x16_t __arm_vdwdupq_m_wb_u8(uint8x16_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_wb_u8))) uint8x16_t __arm_vdwdupq_m(uint8x16_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_n_u16))) uint16x8_t __arm_vdwdupq_n_u16(uint32_t, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_n_u16))) uint16x8_t __arm_vdwdupq_u16(uint32_t, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_n_u32))) uint32x4_t __arm_vdwdupq_n_u32(uint32_t, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_n_u32))) uint32x4_t __arm_vdwdupq_u32(uint32_t, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_n_u8))) uint8x16_t __arm_vdwdupq_n_u8(uint32_t, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_n_u8))) uint8x16_t __arm_vdwdupq_u8(uint32_t, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_wb_u16))) uint16x8_t __arm_vdwdupq_wb_u16(uint32_t *, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_wb_u16))) uint16x8_t __arm_vdwdupq_u16(uint32_t *, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_wb_u32))) uint32x4_t __arm_vdwdupq_wb_u32(uint32_t *, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_wb_u32))) uint32x4_t __arm_vdwdupq_u32(uint32_t *, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_wb_u8))) uint8x16_t __arm_vdwdupq_wb_u8(uint32_t *, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_wb_u8))) uint8x16_t __arm_vdwdupq_u8(uint32_t *, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_n_u16))) uint16x8_t __arm_vdwdupq_x_n_u16(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_n_u16))) uint16x8_t __arm_vdwdupq_x_u16(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_n_u32))) uint32x4_t __arm_vdwdupq_x_n_u32(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_n_u32))) uint32x4_t __arm_vdwdupq_x_u32(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_n_u8))) uint8x16_t __arm_vdwdupq_x_n_u8(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_n_u8))) uint8x16_t __arm_vdwdupq_x_u8(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_wb_u16))) uint16x8_t __arm_vdwdupq_x_wb_u16(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_wb_u16))) uint16x8_t __arm_vdwdupq_x_u16(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_wb_u32))) uint32x4_t __arm_vdwdupq_x_wb_u32(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_wb_u32))) uint32x4_t __arm_vdwdupq_x_u32(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_wb_u8))) uint8x16_t __arm_vdwdupq_x_wb_u8(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_wb_u8))) uint8x16_t __arm_vdwdupq_x_u8(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_s16))) int16x8_t __arm_veorq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_s16))) int16x8_t __arm_veorq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_s32))) int32x4_t __arm_veorq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_s32))) int32x4_t __arm_veorq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_s8))) int8x16_t __arm_veorq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_s8))) int8x16_t __arm_veorq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_u16))) uint16x8_t __arm_veorq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_u16))) uint16x8_t __arm_veorq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_u32))) uint32x4_t __arm_veorq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_u32))) uint32x4_t __arm_veorq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_u8))) uint8x16_t __arm_veorq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_u8))) uint8x16_t __arm_veorq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_s16))) int16x8_t __arm_veorq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_s16))) int16x8_t __arm_veorq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_s32))) int32x4_t __arm_veorq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_s32))) int32x4_t __arm_veorq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_s8))) int8x16_t __arm_veorq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_s8))) int8x16_t __arm_veorq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_u16))) uint16x8_t __arm_veorq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_u16))) uint16x8_t __arm_veorq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_u32))) uint32x4_t __arm_veorq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_u32))) uint32x4_t __arm_veorq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_u8))) uint8x16_t __arm_veorq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_u8))) uint8x16_t __arm_veorq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_s16))) int16x8_t __arm_veorq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_s16))) int16x8_t __arm_veorq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_s32))) int32x4_t __arm_veorq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_s32))) int32x4_t __arm_veorq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_s8))) int8x16_t __arm_veorq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_s8))) int8x16_t __arm_veorq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_u16))) uint16x8_t __arm_veorq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_u16))) uint16x8_t __arm_veorq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_u32))) uint32x4_t __arm_veorq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_u32))) uint32x4_t __arm_veorq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_u8))) uint8x16_t __arm_veorq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_u8))) uint8x16_t __arm_veorq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_s16))) int16_t __arm_vgetq_lane_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_s16))) int16_t __arm_vgetq_lane(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_s32))) int32_t __arm_vgetq_lane_s32(int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_s32))) int32_t __arm_vgetq_lane(int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_s64))) int64_t __arm_vgetq_lane_s64(int64x2_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_s64))) int64_t __arm_vgetq_lane(int64x2_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_s8))) int8_t __arm_vgetq_lane_s8(int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_s8))) int8_t __arm_vgetq_lane(int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_u16))) uint16_t __arm_vgetq_lane_u16(uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_u16))) uint16_t __arm_vgetq_lane(uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_u32))) uint32_t __arm_vgetq_lane_u32(uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_u32))) uint32_t __arm_vgetq_lane(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_u64))) uint64_t __arm_vgetq_lane_u64(uint64x2_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_u64))) uint64_t __arm_vgetq_lane(uint64x2_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_u8))) uint8_t __arm_vgetq_lane_u8(uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_u8))) uint8_t __arm_vgetq_lane(uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_s16))) int16x8_t __arm_vhaddq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_s16))) int16x8_t __arm_vhaddq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_s32))) int32x4_t __arm_vhaddq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_s32))) int32x4_t __arm_vhaddq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_s8))) int8x16_t __arm_vhaddq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_s8))) int8x16_t __arm_vhaddq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_u16))) uint16x8_t __arm_vhaddq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_u16))) uint16x8_t __arm_vhaddq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_u32))) uint32x4_t __arm_vhaddq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_u32))) uint32x4_t __arm_vhaddq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_u8))) uint8x16_t __arm_vhaddq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_u8))) uint8x16_t __arm_vhaddq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_s16))) int16x8_t __arm_vhaddq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_s16))) int16x8_t __arm_vhaddq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_s32))) int32x4_t __arm_vhaddq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_s32))) int32x4_t __arm_vhaddq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_s8))) int8x16_t __arm_vhaddq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_s8))) int8x16_t __arm_vhaddq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_u16))) uint16x8_t __arm_vhaddq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_u16))) uint16x8_t __arm_vhaddq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_u32))) uint32x4_t __arm_vhaddq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_u32))) uint32x4_t __arm_vhaddq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_u8))) uint8x16_t __arm_vhaddq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_u8))) uint8x16_t __arm_vhaddq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_s16))) int16x8_t __arm_vhaddq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_s16))) int16x8_t __arm_vhaddq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_s32))) int32x4_t __arm_vhaddq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_s32))) int32x4_t __arm_vhaddq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_s8))) int8x16_t __arm_vhaddq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_s8))) int8x16_t __arm_vhaddq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_u16))) uint16x8_t __arm_vhaddq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_u16))) uint16x8_t __arm_vhaddq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_u32))) uint32x4_t __arm_vhaddq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_u32))) uint32x4_t __arm_vhaddq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_u8))) uint8x16_t __arm_vhaddq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_u8))) uint8x16_t __arm_vhaddq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_s16))) int16x8_t __arm_vhaddq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_s16))) int16x8_t __arm_vhaddq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_s32))) int32x4_t __arm_vhaddq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_s32))) int32x4_t __arm_vhaddq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_s8))) int8x16_t __arm_vhaddq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_s8))) int8x16_t __arm_vhaddq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_u16))) uint16x8_t __arm_vhaddq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_u16))) uint16x8_t __arm_vhaddq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_u32))) uint32x4_t __arm_vhaddq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_u32))) uint32x4_t __arm_vhaddq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_u8))) uint8x16_t __arm_vhaddq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_u8))) uint8x16_t __arm_vhaddq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_s16))) int16x8_t __arm_vhaddq_x_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_s16))) int16x8_t __arm_vhaddq_x(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_s32))) int32x4_t __arm_vhaddq_x_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_s32))) int32x4_t __arm_vhaddq_x(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_s8))) int8x16_t __arm_vhaddq_x_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_s8))) int8x16_t __arm_vhaddq_x(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_u16))) uint16x8_t __arm_vhaddq_x_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_u16))) uint16x8_t __arm_vhaddq_x(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_u32))) uint32x4_t __arm_vhaddq_x_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_u32))) uint32x4_t __arm_vhaddq_x(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_u8))) uint8x16_t __arm_vhaddq_x_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_u8))) uint8x16_t __arm_vhaddq_x(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_s16))) int16x8_t __arm_vhaddq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_s16))) int16x8_t __arm_vhaddq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_s32))) int32x4_t __arm_vhaddq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_s32))) int32x4_t __arm_vhaddq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_s8))) int8x16_t __arm_vhaddq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_s8))) int8x16_t __arm_vhaddq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_u16))) uint16x8_t __arm_vhaddq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_u16))) uint16x8_t __arm_vhaddq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_u32))) uint32x4_t __arm_vhaddq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_u32))) uint32x4_t __arm_vhaddq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_u8))) uint8x16_t __arm_vhaddq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_u8))) uint8x16_t __arm_vhaddq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_m_s16))) int16x8_t __arm_vhcaddq_rot270_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_m_s16))) int16x8_t __arm_vhcaddq_rot270_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_m_s32))) int32x4_t __arm_vhcaddq_rot270_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_m_s32))) int32x4_t __arm_vhcaddq_rot270_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_m_s8))) int8x16_t __arm_vhcaddq_rot270_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_m_s8))) int8x16_t __arm_vhcaddq_rot270_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_s16))) int16x8_t __arm_vhcaddq_rot270_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_s16))) int16x8_t __arm_vhcaddq_rot270(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_s32))) int32x4_t __arm_vhcaddq_rot270_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_s32))) int32x4_t __arm_vhcaddq_rot270(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_s8))) int8x16_t __arm_vhcaddq_rot270_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_s8))) int8x16_t __arm_vhcaddq_rot270(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_x_s16))) int16x8_t __arm_vhcaddq_rot270_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_x_s16))) int16x8_t __arm_vhcaddq_rot270_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_x_s32))) int32x4_t __arm_vhcaddq_rot270_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_x_s32))) int32x4_t __arm_vhcaddq_rot270_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_x_s8))) int8x16_t __arm_vhcaddq_rot270_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_x_s8))) int8x16_t __arm_vhcaddq_rot270_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_m_s16))) int16x8_t __arm_vhcaddq_rot90_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_m_s16))) int16x8_t __arm_vhcaddq_rot90_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_m_s32))) int32x4_t __arm_vhcaddq_rot90_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_m_s32))) int32x4_t __arm_vhcaddq_rot90_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_m_s8))) int8x16_t __arm_vhcaddq_rot90_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_m_s8))) int8x16_t __arm_vhcaddq_rot90_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_s16))) int16x8_t __arm_vhcaddq_rot90_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_s16))) int16x8_t __arm_vhcaddq_rot90(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_s32))) int32x4_t __arm_vhcaddq_rot90_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_s32))) int32x4_t __arm_vhcaddq_rot90(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_s8))) int8x16_t __arm_vhcaddq_rot90_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_s8))) int8x16_t __arm_vhcaddq_rot90(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_x_s16))) int16x8_t __arm_vhcaddq_rot90_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_x_s16))) int16x8_t __arm_vhcaddq_rot90_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_x_s32))) int32x4_t __arm_vhcaddq_rot90_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_x_s32))) int32x4_t __arm_vhcaddq_rot90_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_x_s8))) int8x16_t __arm_vhcaddq_rot90_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_x_s8))) int8x16_t __arm_vhcaddq_rot90_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_s16))) int16x8_t __arm_vhsubq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_s16))) int16x8_t __arm_vhsubq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_s32))) int32x4_t __arm_vhsubq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_s32))) int32x4_t __arm_vhsubq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_s8))) int8x16_t __arm_vhsubq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_s8))) int8x16_t __arm_vhsubq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_u16))) uint16x8_t __arm_vhsubq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_u16))) uint16x8_t __arm_vhsubq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_u32))) uint32x4_t __arm_vhsubq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_u32))) uint32x4_t __arm_vhsubq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_u8))) uint8x16_t __arm_vhsubq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_u8))) uint8x16_t __arm_vhsubq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_s16))) int16x8_t __arm_vhsubq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_s16))) int16x8_t __arm_vhsubq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_s32))) int32x4_t __arm_vhsubq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_s32))) int32x4_t __arm_vhsubq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_s8))) int8x16_t __arm_vhsubq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_s8))) int8x16_t __arm_vhsubq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_u16))) uint16x8_t __arm_vhsubq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_u16))) uint16x8_t __arm_vhsubq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_u32))) uint32x4_t __arm_vhsubq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_u32))) uint32x4_t __arm_vhsubq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_u8))) uint8x16_t __arm_vhsubq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_u8))) uint8x16_t __arm_vhsubq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_s16))) int16x8_t __arm_vhsubq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_s16))) int16x8_t __arm_vhsubq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_s32))) int32x4_t __arm_vhsubq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_s32))) int32x4_t __arm_vhsubq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_s8))) int8x16_t __arm_vhsubq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_s8))) int8x16_t __arm_vhsubq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_u16))) uint16x8_t __arm_vhsubq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_u16))) uint16x8_t __arm_vhsubq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_u32))) uint32x4_t __arm_vhsubq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_u32))) uint32x4_t __arm_vhsubq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_u8))) uint8x16_t __arm_vhsubq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_u8))) uint8x16_t __arm_vhsubq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_s16))) int16x8_t __arm_vhsubq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_s16))) int16x8_t __arm_vhsubq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_s32))) int32x4_t __arm_vhsubq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_s32))) int32x4_t __arm_vhsubq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_s8))) int8x16_t __arm_vhsubq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_s8))) int8x16_t __arm_vhsubq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_u16))) uint16x8_t __arm_vhsubq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_u16))) uint16x8_t __arm_vhsubq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_u32))) uint32x4_t __arm_vhsubq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_u32))) uint32x4_t __arm_vhsubq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_u8))) uint8x16_t __arm_vhsubq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_u8))) uint8x16_t __arm_vhsubq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_s16))) int16x8_t __arm_vhsubq_x_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_s16))) int16x8_t __arm_vhsubq_x(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_s32))) int32x4_t __arm_vhsubq_x_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_s32))) int32x4_t __arm_vhsubq_x(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_s8))) int8x16_t __arm_vhsubq_x_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_s8))) int8x16_t __arm_vhsubq_x(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_u16))) uint16x8_t __arm_vhsubq_x_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_u16))) uint16x8_t __arm_vhsubq_x(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_u32))) uint32x4_t __arm_vhsubq_x_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_u32))) uint32x4_t __arm_vhsubq_x(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_u8))) uint8x16_t __arm_vhsubq_x_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_u8))) uint8x16_t __arm_vhsubq_x(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_s16))) int16x8_t __arm_vhsubq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_s16))) int16x8_t __arm_vhsubq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_s32))) int32x4_t __arm_vhsubq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_s32))) int32x4_t __arm_vhsubq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_s8))) int8x16_t __arm_vhsubq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_s8))) int8x16_t __arm_vhsubq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_u16))) uint16x8_t __arm_vhsubq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_u16))) uint16x8_t __arm_vhsubq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_u32))) uint32x4_t __arm_vhsubq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_u32))) uint32x4_t __arm_vhsubq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_u8))) uint8x16_t __arm_vhsubq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_u8))) uint8x16_t __arm_vhsubq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_n_u16))) uint16x8_t __arm_vidupq_m_n_u16(uint16x8_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_n_u16))) uint16x8_t __arm_vidupq_m(uint16x8_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_n_u32))) uint32x4_t __arm_vidupq_m_n_u32(uint32x4_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_n_u32))) uint32x4_t __arm_vidupq_m(uint32x4_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_n_u8))) uint8x16_t __arm_vidupq_m_n_u8(uint8x16_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_n_u8))) uint8x16_t __arm_vidupq_m(uint8x16_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_wb_u16))) uint16x8_t __arm_vidupq_m_wb_u16(uint16x8_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_wb_u16))) uint16x8_t __arm_vidupq_m(uint16x8_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_wb_u32))) uint32x4_t __arm_vidupq_m_wb_u32(uint32x4_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_wb_u32))) uint32x4_t __arm_vidupq_m(uint32x4_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_wb_u8))) uint8x16_t __arm_vidupq_m_wb_u8(uint8x16_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_wb_u8))) uint8x16_t __arm_vidupq_m(uint8x16_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_n_u16))) uint16x8_t __arm_vidupq_n_u16(uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_n_u16))) uint16x8_t __arm_vidupq_u16(uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_n_u32))) uint32x4_t __arm_vidupq_n_u32(uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_n_u32))) uint32x4_t __arm_vidupq_u32(uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_n_u8))) uint8x16_t __arm_vidupq_n_u8(uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_n_u8))) uint8x16_t __arm_vidupq_u8(uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_wb_u16))) uint16x8_t __arm_vidupq_wb_u16(uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_wb_u16))) uint16x8_t __arm_vidupq_u16(uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_wb_u32))) uint32x4_t __arm_vidupq_wb_u32(uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_wb_u32))) uint32x4_t __arm_vidupq_u32(uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_wb_u8))) uint8x16_t __arm_vidupq_wb_u8(uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_wb_u8))) uint8x16_t __arm_vidupq_u8(uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_n_u16))) uint16x8_t __arm_vidupq_x_n_u16(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_n_u16))) uint16x8_t __arm_vidupq_x_u16(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_n_u32))) uint32x4_t __arm_vidupq_x_n_u32(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_n_u32))) uint32x4_t __arm_vidupq_x_u32(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_n_u8))) uint8x16_t __arm_vidupq_x_n_u8(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_n_u8))) uint8x16_t __arm_vidupq_x_u8(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_wb_u16))) uint16x8_t __arm_vidupq_x_wb_u16(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_wb_u16))) uint16x8_t __arm_vidupq_x_u16(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_wb_u32))) uint32x4_t __arm_vidupq_x_wb_u32(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_wb_u32))) uint32x4_t __arm_vidupq_x_u32(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_wb_u8))) uint8x16_t __arm_vidupq_x_wb_u8(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_wb_u8))) uint8x16_t __arm_vidupq_x_u8(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_n_u16))) uint16x8_t __arm_viwdupq_m_n_u16(uint16x8_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_n_u16))) uint16x8_t __arm_viwdupq_m(uint16x8_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_n_u32))) uint32x4_t __arm_viwdupq_m_n_u32(uint32x4_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_n_u32))) uint32x4_t __arm_viwdupq_m(uint32x4_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_n_u8))) uint8x16_t __arm_viwdupq_m_n_u8(uint8x16_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_n_u8))) uint8x16_t __arm_viwdupq_m(uint8x16_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_wb_u16))) uint16x8_t __arm_viwdupq_m_wb_u16(uint16x8_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_wb_u16))) uint16x8_t __arm_viwdupq_m(uint16x8_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_wb_u32))) uint32x4_t __arm_viwdupq_m_wb_u32(uint32x4_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_wb_u32))) uint32x4_t __arm_viwdupq_m(uint32x4_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_wb_u8))) uint8x16_t __arm_viwdupq_m_wb_u8(uint8x16_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_wb_u8))) uint8x16_t __arm_viwdupq_m(uint8x16_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_n_u16))) uint16x8_t __arm_viwdupq_n_u16(uint32_t, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_n_u16))) uint16x8_t __arm_viwdupq_u16(uint32_t, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_n_u32))) uint32x4_t __arm_viwdupq_n_u32(uint32_t, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_n_u32))) uint32x4_t __arm_viwdupq_u32(uint32_t, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_n_u8))) uint8x16_t __arm_viwdupq_n_u8(uint32_t, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_n_u8))) uint8x16_t __arm_viwdupq_u8(uint32_t, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_wb_u16))) uint16x8_t __arm_viwdupq_wb_u16(uint32_t *, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_wb_u16))) uint16x8_t __arm_viwdupq_u16(uint32_t *, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_wb_u32))) uint32x4_t __arm_viwdupq_wb_u32(uint32_t *, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_wb_u32))) uint32x4_t __arm_viwdupq_u32(uint32_t *, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_wb_u8))) uint8x16_t __arm_viwdupq_wb_u8(uint32_t *, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_wb_u8))) uint8x16_t __arm_viwdupq_u8(uint32_t *, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_n_u16))) uint16x8_t __arm_viwdupq_x_n_u16(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_n_u16))) uint16x8_t __arm_viwdupq_x_u16(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_n_u32))) uint32x4_t __arm_viwdupq_x_n_u32(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_n_u32))) uint32x4_t __arm_viwdupq_x_u32(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_n_u8))) uint8x16_t __arm_viwdupq_x_n_u8(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_n_u8))) uint8x16_t __arm_viwdupq_x_u8(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_wb_u16))) uint16x8_t __arm_viwdupq_x_wb_u16(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_wb_u16))) uint16x8_t __arm_viwdupq_x_u16(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_wb_u32))) uint32x4_t __arm_viwdupq_x_wb_u32(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_wb_u32))) uint32x4_t __arm_viwdupq_x_u32(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_wb_u8))) uint8x16_t __arm_viwdupq_x_wb_u8(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_wb_u8))) uint8x16_t __arm_viwdupq_x_u8(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_s16))) int16x8_t __arm_vld1q_s16(const int16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_s16))) int16x8_t __arm_vld1q(const int16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_s32))) int32x4_t __arm_vld1q_s32(const int32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_s32))) int32x4_t __arm_vld1q(const int32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_s8))) int8x16_t __arm_vld1q_s8(const int8_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_s8))) int8x16_t __arm_vld1q(const int8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_u16))) uint16x8_t __arm_vld1q_u16(const uint16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_u16))) uint16x8_t __arm_vld1q(const uint16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_u32))) uint32x4_t __arm_vld1q_u32(const uint32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_u32))) uint32x4_t __arm_vld1q(const uint32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_u8))) uint8x16_t __arm_vld1q_u8(const uint8_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_u8))) uint8x16_t __arm_vld1q(const uint8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_s16))) int16x8_t __arm_vld1q_z_s16(const int16_t *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_s16))) int16x8_t __arm_vld1q_z(const int16_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_s32))) int32x4_t __arm_vld1q_z_s32(const int32_t *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_s32))) int32x4_t __arm_vld1q_z(const int32_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_s8))) int8x16_t __arm_vld1q_z_s8(const int8_t *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_s8))) int8x16_t __arm_vld1q_z(const int8_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_u16))) uint16x8_t __arm_vld1q_z_u16(const uint16_t *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_u16))) uint16x8_t __arm_vld1q_z(const uint16_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_u32))) uint32x4_t __arm_vld1q_z_u32(const uint32_t *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_u32))) uint32x4_t __arm_vld1q_z(const uint32_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_u8))) uint8x16_t __arm_vld1q_z_u8(const uint8_t *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_u8))) uint8x16_t __arm_vld1q_z(const uint8_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld2q_s16))) int16x8x2_t __arm_vld2q_s16(const int16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld2q_s16))) int16x8x2_t __arm_vld2q(const int16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld2q_s32))) int32x4x2_t __arm_vld2q_s32(const int32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld2q_s32))) int32x4x2_t __arm_vld2q(const int32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld2q_s8))) int8x16x2_t __arm_vld2q_s8(const int8_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld2q_s8))) int8x16x2_t __arm_vld2q(const int8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld2q_u16))) uint16x8x2_t __arm_vld2q_u16(const uint16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld2q_u16))) uint16x8x2_t __arm_vld2q(const uint16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld2q_u32))) uint32x4x2_t __arm_vld2q_u32(const uint32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld2q_u32))) uint32x4x2_t __arm_vld2q(const uint32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld2q_u8))) uint8x16x2_t __arm_vld2q_u8(const uint8_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld2q_u8))) uint8x16x2_t __arm_vld2q(const uint8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld4q_s16))) int16x8x4_t __arm_vld4q_s16(const int16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld4q_s16))) int16x8x4_t __arm_vld4q(const int16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld4q_s32))) int32x4x4_t __arm_vld4q_s32(const int32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld4q_s32))) int32x4x4_t __arm_vld4q(const int32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld4q_s8))) int8x16x4_t __arm_vld4q_s8(const int8_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld4q_s8))) int8x16x4_t __arm_vld4q(const int8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld4q_u16))) uint16x8x4_t __arm_vld4q_u16(const uint16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld4q_u16))) uint16x8x4_t __arm_vld4q(const uint16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld4q_u32))) uint32x4x4_t __arm_vld4q_u32(const uint32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld4q_u32))) uint32x4x4_t __arm_vld4q(const uint32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld4q_u8))) uint8x16x4_t __arm_vld4q_u8(const uint8_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld4q_u8))) uint8x16x4_t __arm_vld4q(const uint8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_s16))) int16x8_t __arm_vldrbq_gather_offset_s16(const int8_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_s16))) int16x8_t __arm_vldrbq_gather_offset(const int8_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_s32))) int32x4_t __arm_vldrbq_gather_offset_s32(const int8_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_s32))) int32x4_t __arm_vldrbq_gather_offset(const int8_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_s8))) int8x16_t __arm_vldrbq_gather_offset_s8(const int8_t *, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_s8))) int8x16_t __arm_vldrbq_gather_offset(const int8_t *, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_u16))) uint16x8_t __arm_vldrbq_gather_offset_u16(const uint8_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_u16))) uint16x8_t __arm_vldrbq_gather_offset(const uint8_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_u32))) uint32x4_t __arm_vldrbq_gather_offset_u32(const uint8_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_u32))) uint32x4_t __arm_vldrbq_gather_offset(const uint8_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_u8))) uint8x16_t __arm_vldrbq_gather_offset_u8(const uint8_t *, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_u8))) uint8x16_t __arm_vldrbq_gather_offset(const uint8_t *, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s16))) int16x8_t __arm_vldrbq_gather_offset_z_s16(const int8_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s16))) int16x8_t __arm_vldrbq_gather_offset_z(const int8_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s32))) int32x4_t __arm_vldrbq_gather_offset_z_s32(const int8_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s32))) int32x4_t __arm_vldrbq_gather_offset_z(const int8_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s8))) int8x16_t __arm_vldrbq_gather_offset_z_s8(const int8_t *, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s8))) int8x16_t __arm_vldrbq_gather_offset_z(const int8_t *, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u16))) uint16x8_t __arm_vldrbq_gather_offset_z_u16(const uint8_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u16))) uint16x8_t __arm_vldrbq_gather_offset_z(const uint8_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u32))) uint32x4_t __arm_vldrbq_gather_offset_z_u32(const uint8_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u32))) uint32x4_t __arm_vldrbq_gather_offset_z(const uint8_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u8))) uint8x16_t __arm_vldrbq_gather_offset_z_u8(const uint8_t *, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u8))) uint8x16_t __arm_vldrbq_gather_offset_z(const uint8_t *, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_s16))) int16x8_t __arm_vldrbq_s16(const int8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_s32))) int32x4_t __arm_vldrbq_s32(const int8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_s8))) int8x16_t __arm_vldrbq_s8(const int8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_u16))) uint16x8_t __arm_vldrbq_u16(const uint8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_u32))) uint32x4_t __arm_vldrbq_u32(const uint8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_u8))) uint8x16_t __arm_vldrbq_u8(const uint8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_z_s16))) int16x8_t __arm_vldrbq_z_s16(const int8_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_z_s32))) int32x4_t __arm_vldrbq_z_s32(const int8_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_z_s8))) int8x16_t __arm_vldrbq_z_s8(const int8_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_z_u16))) uint16x8_t __arm_vldrbq_z_u16(const uint8_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_z_u32))) uint32x4_t __arm_vldrbq_z_u32(const uint8_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_z_u8))) uint8x16_t __arm_vldrbq_z_u8(const uint8_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_base_s64))) int64x2_t __arm_vldrdq_gather_base_s64(uint64x2_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_base_u64))) uint64x2_t __arm_vldrdq_gather_base_u64(uint64x2_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_base_wb_s64))) int64x2_t __arm_vldrdq_gather_base_wb_s64(uint64x2_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_base_wb_u64))) uint64x2_t __arm_vldrdq_gather_base_wb_u64(uint64x2_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_base_wb_z_s64))) int64x2_t __arm_vldrdq_gather_base_wb_z_s64(uint64x2_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_base_wb_z_u64))) uint64x2_t __arm_vldrdq_gather_base_wb_z_u64(uint64x2_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_base_z_s64))) int64x2_t __arm_vldrdq_gather_base_z_s64(uint64x2_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_base_z_u64))) uint64x2_t __arm_vldrdq_gather_base_z_u64(uint64x2_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_offset_s64))) int64x2_t __arm_vldrdq_gather_offset_s64(const int64_t *, uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_offset_s64))) int64x2_t __arm_vldrdq_gather_offset(const int64_t *, uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_offset_u64))) uint64x2_t __arm_vldrdq_gather_offset_u64(const uint64_t *, uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_offset_u64))) uint64x2_t __arm_vldrdq_gather_offset(const uint64_t *, uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_offset_z_s64))) int64x2_t __arm_vldrdq_gather_offset_z_s64(const int64_t *, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_offset_z_s64))) int64x2_t __arm_vldrdq_gather_offset_z(const int64_t *, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_offset_z_u64))) uint64x2_t __arm_vldrdq_gather_offset_z_u64(const uint64_t *, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_offset_z_u64))) uint64x2_t __arm_vldrdq_gather_offset_z(const uint64_t *, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_s64))) int64x2_t __arm_vldrdq_gather_shifted_offset_s64(const int64_t *, uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_s64))) int64x2_t __arm_vldrdq_gather_shifted_offset(const int64_t *, uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_u64))) uint64x2_t __arm_vldrdq_gather_shifted_offset_u64(const uint64_t *, uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_u64))) uint64x2_t __arm_vldrdq_gather_shifted_offset(const uint64_t *, uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_z_s64))) int64x2_t __arm_vldrdq_gather_shifted_offset_z_s64(const int64_t *, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_z_s64))) int64x2_t __arm_vldrdq_gather_shifted_offset_z(const int64_t *, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_z_u64))) uint64x2_t __arm_vldrdq_gather_shifted_offset_z_u64(const uint64_t *, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_z_u64))) uint64x2_t __arm_vldrdq_gather_shifted_offset_z(const uint64_t *, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_s16))) int16x8_t __arm_vldrhq_gather_offset_s16(const int16_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_s16))) int16x8_t __arm_vldrhq_gather_offset(const int16_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_s32))) int32x4_t __arm_vldrhq_gather_offset_s32(const int16_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_s32))) int32x4_t __arm_vldrhq_gather_offset(const int16_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_u16))) uint16x8_t __arm_vldrhq_gather_offset_u16(const uint16_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_u16))) uint16x8_t __arm_vldrhq_gather_offset(const uint16_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_u32))) uint32x4_t __arm_vldrhq_gather_offset_u32(const uint16_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_u32))) uint32x4_t __arm_vldrhq_gather_offset(const uint16_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_s16))) int16x8_t __arm_vldrhq_gather_offset_z_s16(const int16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_s16))) int16x8_t __arm_vldrhq_gather_offset_z(const int16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_s32))) int32x4_t __arm_vldrhq_gather_offset_z_s32(const int16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_s32))) int32x4_t __arm_vldrhq_gather_offset_z(const int16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_u16))) uint16x8_t __arm_vldrhq_gather_offset_z_u16(const uint16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_u16))) uint16x8_t __arm_vldrhq_gather_offset_z(const uint16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_u32))) uint32x4_t __arm_vldrhq_gather_offset_z_u32(const uint16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_u32))) uint32x4_t __arm_vldrhq_gather_offset_z(const uint16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_s16))) int16x8_t __arm_vldrhq_gather_shifted_offset_s16(const int16_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_s16))) int16x8_t __arm_vldrhq_gather_shifted_offset(const int16_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_s32))) int32x4_t __arm_vldrhq_gather_shifted_offset_s32(const int16_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_s32))) int32x4_t __arm_vldrhq_gather_shifted_offset(const int16_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_u16))) uint16x8_t __arm_vldrhq_gather_shifted_offset_u16(const uint16_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_u16))) uint16x8_t __arm_vldrhq_gather_shifted_offset(const uint16_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_u32))) uint32x4_t __arm_vldrhq_gather_shifted_offset_u32(const uint16_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_u32))) uint32x4_t __arm_vldrhq_gather_shifted_offset(const uint16_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_s16))) int16x8_t __arm_vldrhq_gather_shifted_offset_z_s16(const int16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_s16))) int16x8_t __arm_vldrhq_gather_shifted_offset_z(const int16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_s32))) int32x4_t __arm_vldrhq_gather_shifted_offset_z_s32(const int16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_s32))) int32x4_t __arm_vldrhq_gather_shifted_offset_z(const int16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_u16))) uint16x8_t __arm_vldrhq_gather_shifted_offset_z_u16(const uint16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_u16))) uint16x8_t __arm_vldrhq_gather_shifted_offset_z(const uint16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_u32))) uint32x4_t __arm_vldrhq_gather_shifted_offset_z_u32(const uint16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_u32))) uint32x4_t __arm_vldrhq_gather_shifted_offset_z(const uint16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_s16))) int16x8_t __arm_vldrhq_s16(const int16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_s32))) int32x4_t __arm_vldrhq_s32(const int16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_u16))) uint16x8_t __arm_vldrhq_u16(const uint16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_u32))) uint32x4_t __arm_vldrhq_u32(const uint16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_z_s16))) int16x8_t __arm_vldrhq_z_s16(const int16_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_z_s32))) int32x4_t __arm_vldrhq_z_s32(const int16_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_z_u16))) uint16x8_t __arm_vldrhq_z_u16(const uint16_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_z_u32))) uint32x4_t __arm_vldrhq_z_u32(const uint16_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_s32))) int32x4_t __arm_vldrwq_gather_base_s32(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_u32))) uint32x4_t __arm_vldrwq_gather_base_u32(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_wb_s32))) int32x4_t __arm_vldrwq_gather_base_wb_s32(uint32x4_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_wb_u32))) uint32x4_t __arm_vldrwq_gather_base_wb_u32(uint32x4_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_wb_z_s32))) int32x4_t __arm_vldrwq_gather_base_wb_z_s32(uint32x4_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_wb_z_u32))) uint32x4_t __arm_vldrwq_gather_base_wb_z_u32(uint32x4_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_z_s32))) int32x4_t __arm_vldrwq_gather_base_z_s32(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_z_u32))) uint32x4_t __arm_vldrwq_gather_base_z_u32(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_s32))) int32x4_t __arm_vldrwq_gather_offset_s32(const int32_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_s32))) int32x4_t __arm_vldrwq_gather_offset(const int32_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_u32))) uint32x4_t __arm_vldrwq_gather_offset_u32(const uint32_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_u32))) uint32x4_t __arm_vldrwq_gather_offset(const uint32_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_z_s32))) int32x4_t __arm_vldrwq_gather_offset_z_s32(const int32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_z_s32))) int32x4_t __arm_vldrwq_gather_offset_z(const int32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_z_u32))) uint32x4_t __arm_vldrwq_gather_offset_z_u32(const uint32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_z_u32))) uint32x4_t __arm_vldrwq_gather_offset_z(const uint32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_s32))) int32x4_t __arm_vldrwq_gather_shifted_offset_s32(const int32_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_s32))) int32x4_t __arm_vldrwq_gather_shifted_offset(const int32_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_u32))) uint32x4_t __arm_vldrwq_gather_shifted_offset_u32(const uint32_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_u32))) uint32x4_t __arm_vldrwq_gather_shifted_offset(const uint32_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_s32))) int32x4_t __arm_vldrwq_gather_shifted_offset_z_s32(const int32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_s32))) int32x4_t __arm_vldrwq_gather_shifted_offset_z(const int32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_u32))) uint32x4_t __arm_vldrwq_gather_shifted_offset_z_u32(const uint32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_u32))) uint32x4_t __arm_vldrwq_gather_shifted_offset_z(const uint32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_s32))) int32x4_t __arm_vldrwq_s32(const int32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_u32))) uint32x4_t __arm_vldrwq_u32(const uint32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_z_s32))) int32x4_t __arm_vldrwq_z_s32(const int32_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_z_u32))) uint32x4_t __arm_vldrwq_z_u32(const uint32_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_m_s16))) uint16x8_t __arm_vmaxaq_m_s16(uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_m_s16))) uint16x8_t __arm_vmaxaq_m(uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_m_s32))) uint32x4_t __arm_vmaxaq_m_s32(uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_m_s32))) uint32x4_t __arm_vmaxaq_m(uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_m_s8))) uint8x16_t __arm_vmaxaq_m_s8(uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_m_s8))) uint8x16_t __arm_vmaxaq_m(uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_s16))) uint16x8_t __arm_vmaxaq_s16(uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_s16))) uint16x8_t __arm_vmaxaq(uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_s32))) uint32x4_t __arm_vmaxaq_s32(uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_s32))) uint32x4_t __arm_vmaxaq(uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_s8))) uint8x16_t __arm_vmaxaq_s8(uint8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_s8))) uint8x16_t __arm_vmaxaq(uint8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_p_s16))) uint16_t __arm_vmaxavq_p_s16(uint16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_p_s16))) uint16_t __arm_vmaxavq_p(uint16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_p_s32))) uint32_t __arm_vmaxavq_p_s32(uint32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_p_s32))) uint32_t __arm_vmaxavq_p(uint32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_p_s8))) uint8_t __arm_vmaxavq_p_s8(uint8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_p_s8))) uint8_t __arm_vmaxavq_p(uint8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_s16))) uint16_t __arm_vmaxavq_s16(uint16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_s16))) uint16_t __arm_vmaxavq(uint16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_s32))) uint32_t __arm_vmaxavq_s32(uint32_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_s32))) uint32_t __arm_vmaxavq(uint32_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_s8))) uint8_t __arm_vmaxavq_s8(uint8_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_s8))) uint8_t __arm_vmaxavq(uint8_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_s16))) int16x8_t __arm_vmaxq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_s16))) int16x8_t __arm_vmaxq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_s32))) int32x4_t __arm_vmaxq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_s32))) int32x4_t __arm_vmaxq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_s8))) int8x16_t __arm_vmaxq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_s8))) int8x16_t __arm_vmaxq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_u16))) uint16x8_t __arm_vmaxq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_u16))) uint16x8_t __arm_vmaxq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_u32))) uint32x4_t __arm_vmaxq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_u32))) uint32x4_t __arm_vmaxq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_u8))) uint8x16_t __arm_vmaxq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_u8))) uint8x16_t __arm_vmaxq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_s16))) int16x8_t __arm_vmaxq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_s16))) int16x8_t __arm_vmaxq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_s32))) int32x4_t __arm_vmaxq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_s32))) int32x4_t __arm_vmaxq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_s8))) int8x16_t __arm_vmaxq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_s8))) int8x16_t __arm_vmaxq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_u16))) uint16x8_t __arm_vmaxq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_u16))) uint16x8_t __arm_vmaxq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_u32))) uint32x4_t __arm_vmaxq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_u32))) uint32x4_t __arm_vmaxq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_u8))) uint8x16_t __arm_vmaxq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_u8))) uint8x16_t __arm_vmaxq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_s16))) int16x8_t __arm_vmaxq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_s16))) int16x8_t __arm_vmaxq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_s32))) int32x4_t __arm_vmaxq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_s32))) int32x4_t __arm_vmaxq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_s8))) int8x16_t __arm_vmaxq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_s8))) int8x16_t __arm_vmaxq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_u16))) uint16x8_t __arm_vmaxq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_u16))) uint16x8_t __arm_vmaxq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_u32))) uint32x4_t __arm_vmaxq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_u32))) uint32x4_t __arm_vmaxq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_u8))) uint8x16_t __arm_vmaxq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_u8))) uint8x16_t __arm_vmaxq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_s16))) int16_t __arm_vmaxvq_p_s16(int16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_s16))) int16_t __arm_vmaxvq_p(int16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_s32))) int32_t __arm_vmaxvq_p_s32(int32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_s32))) int32_t __arm_vmaxvq_p(int32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_s8))) int8_t __arm_vmaxvq_p_s8(int8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_s8))) int8_t __arm_vmaxvq_p(int8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_u16))) uint16_t __arm_vmaxvq_p_u16(uint16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_u16))) uint16_t __arm_vmaxvq_p(uint16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_u32))) uint32_t __arm_vmaxvq_p_u32(uint32_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_u32))) uint32_t __arm_vmaxvq_p(uint32_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_u8))) uint8_t __arm_vmaxvq_p_u8(uint8_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_u8))) uint8_t __arm_vmaxvq_p(uint8_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_s16))) int16_t __arm_vmaxvq_s16(int16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_s16))) int16_t __arm_vmaxvq(int16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_s32))) int32_t __arm_vmaxvq_s32(int32_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_s32))) int32_t __arm_vmaxvq(int32_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_s8))) int8_t __arm_vmaxvq_s8(int8_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_s8))) int8_t __arm_vmaxvq(int8_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_u16))) uint16_t __arm_vmaxvq_u16(uint16_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_u16))) uint16_t __arm_vmaxvq(uint16_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_u32))) uint32_t __arm_vmaxvq_u32(uint32_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_u32))) uint32_t __arm_vmaxvq(uint32_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_u8))) uint8_t __arm_vmaxvq_u8(uint8_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_u8))) uint8_t __arm_vmaxvq(uint8_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminaq_m_s16))) uint16x8_t __arm_vminaq_m_s16(uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminaq_m_s16))) uint16x8_t __arm_vminaq_m(uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminaq_m_s32))) uint32x4_t __arm_vminaq_m_s32(uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminaq_m_s32))) uint32x4_t __arm_vminaq_m(uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminaq_m_s8))) uint8x16_t __arm_vminaq_m_s8(uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminaq_m_s8))) uint8x16_t __arm_vminaq_m(uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminaq_s16))) uint16x8_t __arm_vminaq_s16(uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminaq_s16))) uint16x8_t __arm_vminaq(uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminaq_s32))) uint32x4_t __arm_vminaq_s32(uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminaq_s32))) uint32x4_t __arm_vminaq(uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminaq_s8))) uint8x16_t __arm_vminaq_s8(uint8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminaq_s8))) uint8x16_t __arm_vminaq(uint8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminavq_p_s16))) uint16_t __arm_vminavq_p_s16(uint16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminavq_p_s16))) uint16_t __arm_vminavq_p(uint16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminavq_p_s32))) uint32_t __arm_vminavq_p_s32(uint32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminavq_p_s32))) uint32_t __arm_vminavq_p(uint32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminavq_p_s8))) uint8_t __arm_vminavq_p_s8(uint8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminavq_p_s8))) uint8_t __arm_vminavq_p(uint8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminavq_s16))) uint16_t __arm_vminavq_s16(uint16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminavq_s16))) uint16_t __arm_vminavq(uint16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminavq_s32))) uint32_t __arm_vminavq_s32(uint32_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminavq_s32))) uint32_t __arm_vminavq(uint32_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminavq_s8))) uint8_t __arm_vminavq_s8(uint8_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminavq_s8))) uint8_t __arm_vminavq(uint8_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_s16))) int16x8_t __arm_vminq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_s16))) int16x8_t __arm_vminq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_s32))) int32x4_t __arm_vminq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_s32))) int32x4_t __arm_vminq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_s8))) int8x16_t __arm_vminq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_s8))) int8x16_t __arm_vminq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_u16))) uint16x8_t __arm_vminq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_u16))) uint16x8_t __arm_vminq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_u32))) uint32x4_t __arm_vminq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_u32))) uint32x4_t __arm_vminq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_u8))) uint8x16_t __arm_vminq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_u8))) uint8x16_t __arm_vminq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_s16))) int16x8_t __arm_vminq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_s16))) int16x8_t __arm_vminq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_s32))) int32x4_t __arm_vminq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_s32))) int32x4_t __arm_vminq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_s8))) int8x16_t __arm_vminq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_s8))) int8x16_t __arm_vminq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_u16))) uint16x8_t __arm_vminq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_u16))) uint16x8_t __arm_vminq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_u32))) uint32x4_t __arm_vminq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_u32))) uint32x4_t __arm_vminq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_u8))) uint8x16_t __arm_vminq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_u8))) uint8x16_t __arm_vminq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_s16))) int16x8_t __arm_vminq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_s16))) int16x8_t __arm_vminq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_s32))) int32x4_t __arm_vminq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_s32))) int32x4_t __arm_vminq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_s8))) int8x16_t __arm_vminq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_s8))) int8x16_t __arm_vminq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_u16))) uint16x8_t __arm_vminq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_u16))) uint16x8_t __arm_vminq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_u32))) uint32x4_t __arm_vminq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_u32))) uint32x4_t __arm_vminq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_u8))) uint8x16_t __arm_vminq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_u8))) uint8x16_t __arm_vminq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_s16))) int16_t __arm_vminvq_p_s16(int16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_s16))) int16_t __arm_vminvq_p(int16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_s32))) int32_t __arm_vminvq_p_s32(int32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_s32))) int32_t __arm_vminvq_p(int32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_s8))) int8_t __arm_vminvq_p_s8(int8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_s8))) int8_t __arm_vminvq_p(int8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_u16))) uint16_t __arm_vminvq_p_u16(uint16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_u16))) uint16_t __arm_vminvq_p(uint16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_u32))) uint32_t __arm_vminvq_p_u32(uint32_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_u32))) uint32_t __arm_vminvq_p(uint32_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_u8))) uint8_t __arm_vminvq_p_u8(uint8_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_u8))) uint8_t __arm_vminvq_p(uint8_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_s16))) int16_t __arm_vminvq_s16(int16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_s16))) int16_t __arm_vminvq(int16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_s32))) int32_t __arm_vminvq_s32(int32_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_s32))) int32_t __arm_vminvq(int32_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_s8))) int8_t __arm_vminvq_s8(int8_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_s8))) int8_t __arm_vminvq(int8_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_u16))) uint16_t __arm_vminvq_u16(uint16_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_u16))) uint16_t __arm_vminvq(uint16_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_u32))) uint32_t __arm_vminvq_u32(uint32_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_u32))) uint32_t __arm_vminvq(uint32_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_u8))) uint8_t __arm_vminvq_u8(uint8_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_u8))) uint8_t __arm_vminvq(uint8_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_s16))) int32_t __arm_vmladavaq_p_s16(int32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_s16))) int32_t __arm_vmladavaq_p(int32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_s32))) int32_t __arm_vmladavaq_p_s32(int32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_s32))) int32_t __arm_vmladavaq_p(int32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_s8))) int32_t __arm_vmladavaq_p_s8(int32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_s8))) int32_t __arm_vmladavaq_p(int32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_u16))) uint32_t __arm_vmladavaq_p_u16(uint32_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_u16))) uint32_t __arm_vmladavaq_p(uint32_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_u32))) uint32_t __arm_vmladavaq_p_u32(uint32_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_u32))) uint32_t __arm_vmladavaq_p(uint32_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_u8))) uint32_t __arm_vmladavaq_p_u8(uint32_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_u8))) uint32_t __arm_vmladavaq_p(uint32_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_s16))) int32_t __arm_vmladavaq_s16(int32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_s16))) int32_t __arm_vmladavaq(int32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_s32))) int32_t __arm_vmladavaq_s32(int32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_s32))) int32_t __arm_vmladavaq(int32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_s8))) int32_t __arm_vmladavaq_s8(int32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_s8))) int32_t __arm_vmladavaq(int32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_u16))) uint32_t __arm_vmladavaq_u16(uint32_t, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_u16))) uint32_t __arm_vmladavaq(uint32_t, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_u32))) uint32_t __arm_vmladavaq_u32(uint32_t, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_u32))) uint32_t __arm_vmladavaq(uint32_t, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_u8))) uint32_t __arm_vmladavaq_u8(uint32_t, uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_u8))) uint32_t __arm_vmladavaq(uint32_t, uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_p_s16))) int32_t __arm_vmladavaxq_p_s16(int32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_p_s16))) int32_t __arm_vmladavaxq_p(int32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_p_s32))) int32_t __arm_vmladavaxq_p_s32(int32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_p_s32))) int32_t __arm_vmladavaxq_p(int32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_p_s8))) int32_t __arm_vmladavaxq_p_s8(int32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_p_s8))) int32_t __arm_vmladavaxq_p(int32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_s16))) int32_t __arm_vmladavaxq_s16(int32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_s16))) int32_t __arm_vmladavaxq(int32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_s32))) int32_t __arm_vmladavaxq_s32(int32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_s32))) int32_t __arm_vmladavaxq(int32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_s8))) int32_t __arm_vmladavaxq_s8(int32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_s8))) int32_t __arm_vmladavaxq(int32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_s16))) int32_t __arm_vmladavq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_s16))) int32_t __arm_vmladavq_p(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_s32))) int32_t __arm_vmladavq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_s32))) int32_t __arm_vmladavq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_s8))) int32_t __arm_vmladavq_p_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_s8))) int32_t __arm_vmladavq_p(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_u16))) uint32_t __arm_vmladavq_p_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_u16))) uint32_t __arm_vmladavq_p(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_u32))) uint32_t __arm_vmladavq_p_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_u32))) uint32_t __arm_vmladavq_p(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_u8))) uint32_t __arm_vmladavq_p_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_u8))) uint32_t __arm_vmladavq_p(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_s16))) int32_t __arm_vmladavq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_s16))) int32_t __arm_vmladavq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_s32))) int32_t __arm_vmladavq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_s32))) int32_t __arm_vmladavq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_s8))) int32_t __arm_vmladavq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_s8))) int32_t __arm_vmladavq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_u16))) uint32_t __arm_vmladavq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_u16))) uint32_t __arm_vmladavq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_u32))) uint32_t __arm_vmladavq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_u32))) uint32_t __arm_vmladavq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_u8))) uint32_t __arm_vmladavq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_u8))) uint32_t __arm_vmladavq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_p_s16))) int32_t __arm_vmladavxq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_p_s16))) int32_t __arm_vmladavxq_p(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_p_s32))) int32_t __arm_vmladavxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_p_s32))) int32_t __arm_vmladavxq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_p_s8))) int32_t __arm_vmladavxq_p_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_p_s8))) int32_t __arm_vmladavxq_p(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_s16))) int32_t __arm_vmladavxq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_s16))) int32_t __arm_vmladavxq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_s32))) int32_t __arm_vmladavxq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_s32))) int32_t __arm_vmladavxq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_s8))) int32_t __arm_vmladavxq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_s8))) int32_t __arm_vmladavxq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_p_s16))) int64_t __arm_vmlaldavaq_p_s16(int64_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_p_s16))) int64_t __arm_vmlaldavaq_p(int64_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_p_s32))) int64_t __arm_vmlaldavaq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_p_s32))) int64_t __arm_vmlaldavaq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_p_u16))) uint64_t __arm_vmlaldavaq_p_u16(uint64_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_p_u16))) uint64_t __arm_vmlaldavaq_p(uint64_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_p_u32))) uint64_t __arm_vmlaldavaq_p_u32(uint64_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_p_u32))) uint64_t __arm_vmlaldavaq_p(uint64_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_s16))) int64_t __arm_vmlaldavaq_s16(int64_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_s16))) int64_t __arm_vmlaldavaq(int64_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_s32))) int64_t __arm_vmlaldavaq_s32(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_s32))) int64_t __arm_vmlaldavaq(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_u16))) uint64_t __arm_vmlaldavaq_u16(uint64_t, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_u16))) uint64_t __arm_vmlaldavaq(uint64_t, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_u32))) uint64_t __arm_vmlaldavaq_u32(uint64_t, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_u32))) uint64_t __arm_vmlaldavaq(uint64_t, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaxq_p_s16))) int64_t __arm_vmlaldavaxq_p_s16(int64_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaxq_p_s16))) int64_t __arm_vmlaldavaxq_p(int64_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaxq_p_s32))) int64_t __arm_vmlaldavaxq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaxq_p_s32))) int64_t __arm_vmlaldavaxq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaxq_s16))) int64_t __arm_vmlaldavaxq_s16(int64_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaxq_s16))) int64_t __arm_vmlaldavaxq(int64_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaxq_s32))) int64_t __arm_vmlaldavaxq_s32(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaxq_s32))) int64_t __arm_vmlaldavaxq(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_p_s16))) int64_t __arm_vmlaldavq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_p_s16))) int64_t __arm_vmlaldavq_p(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_p_s32))) int64_t __arm_vmlaldavq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_p_s32))) int64_t __arm_vmlaldavq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_p_u16))) uint64_t __arm_vmlaldavq_p_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_p_u16))) uint64_t __arm_vmlaldavq_p(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_p_u32))) uint64_t __arm_vmlaldavq_p_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_p_u32))) uint64_t __arm_vmlaldavq_p(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_s16))) int64_t __arm_vmlaldavq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_s16))) int64_t __arm_vmlaldavq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_s32))) int64_t __arm_vmlaldavq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_s32))) int64_t __arm_vmlaldavq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_u16))) uint64_t __arm_vmlaldavq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_u16))) uint64_t __arm_vmlaldavq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_u32))) uint64_t __arm_vmlaldavq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_u32))) uint64_t __arm_vmlaldavq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavxq_p_s16))) int64_t __arm_vmlaldavxq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavxq_p_s16))) int64_t __arm_vmlaldavxq_p(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavxq_p_s32))) int64_t __arm_vmlaldavxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavxq_p_s32))) int64_t __arm_vmlaldavxq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavxq_s16))) int64_t __arm_vmlaldavxq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavxq_s16))) int64_t __arm_vmlaldavxq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavxq_s32))) int64_t __arm_vmlaldavxq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavxq_s32))) int64_t __arm_vmlaldavxq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_s16))) int16x8_t __arm_vmlaq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_s16))) int16x8_t __arm_vmlaq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_s32))) int32x4_t __arm_vmlaq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_s32))) int32x4_t __arm_vmlaq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_s8))) int8x16_t __arm_vmlaq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_s8))) int8x16_t __arm_vmlaq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_u16))) uint16x8_t __arm_vmlaq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_u16))) uint16x8_t __arm_vmlaq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_u32))) uint32x4_t __arm_vmlaq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_u32))) uint32x4_t __arm_vmlaq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_u8))) uint8x16_t __arm_vmlaq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_u8))) uint8x16_t __arm_vmlaq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_s16))) int16x8_t __arm_vmlaq_n_s16(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_s16))) int16x8_t __arm_vmlaq(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_s32))) int32x4_t __arm_vmlaq_n_s32(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_s32))) int32x4_t __arm_vmlaq(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_s8))) int8x16_t __arm_vmlaq_n_s8(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_s8))) int8x16_t __arm_vmlaq(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_u16))) uint16x8_t __arm_vmlaq_n_u16(uint16x8_t, uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_u16))) uint16x8_t __arm_vmlaq(uint16x8_t, uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_u32))) uint32x4_t __arm_vmlaq_n_u32(uint32x4_t, uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_u32))) uint32x4_t __arm_vmlaq(uint32x4_t, uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_u8))) uint8x16_t __arm_vmlaq_n_u8(uint8x16_t, uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_u8))) uint8x16_t __arm_vmlaq(uint8x16_t, uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_s16))) int16x8_t __arm_vmlasq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_s16))) int16x8_t __arm_vmlasq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_s32))) int32x4_t __arm_vmlasq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_s32))) int32x4_t __arm_vmlasq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_s8))) int8x16_t __arm_vmlasq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_s8))) int8x16_t __arm_vmlasq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_u16))) uint16x8_t __arm_vmlasq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_u16))) uint16x8_t __arm_vmlasq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_u32))) uint32x4_t __arm_vmlasq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_u32))) uint32x4_t __arm_vmlasq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_u8))) uint8x16_t __arm_vmlasq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_u8))) uint8x16_t __arm_vmlasq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_s16))) int16x8_t __arm_vmlasq_n_s16(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_s16))) int16x8_t __arm_vmlasq(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_s32))) int32x4_t __arm_vmlasq_n_s32(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_s32))) int32x4_t __arm_vmlasq(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_s8))) int8x16_t __arm_vmlasq_n_s8(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_s8))) int8x16_t __arm_vmlasq(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_u16))) uint16x8_t __arm_vmlasq_n_u16(uint16x8_t, uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_u16))) uint16x8_t __arm_vmlasq(uint16x8_t, uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_u32))) uint32x4_t __arm_vmlasq_n_u32(uint32x4_t, uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_u32))) uint32x4_t __arm_vmlasq(uint32x4_t, uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_u8))) uint8x16_t __arm_vmlasq_n_u8(uint8x16_t, uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_u8))) uint8x16_t __arm_vmlasq(uint8x16_t, uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_p_s16))) int32_t __arm_vmlsdavaq_p_s16(int32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_p_s16))) int32_t __arm_vmlsdavaq_p(int32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_p_s32))) int32_t __arm_vmlsdavaq_p_s32(int32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_p_s32))) int32_t __arm_vmlsdavaq_p(int32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_p_s8))) int32_t __arm_vmlsdavaq_p_s8(int32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_p_s8))) int32_t __arm_vmlsdavaq_p(int32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_s16))) int32_t __arm_vmlsdavaq_s16(int32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_s16))) int32_t __arm_vmlsdavaq(int32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_s32))) int32_t __arm_vmlsdavaq_s32(int32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_s32))) int32_t __arm_vmlsdavaq(int32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_s8))) int32_t __arm_vmlsdavaq_s8(int32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_s8))) int32_t __arm_vmlsdavaq(int32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_p_s16))) int32_t __arm_vmlsdavaxq_p_s16(int32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_p_s16))) int32_t __arm_vmlsdavaxq_p(int32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_p_s32))) int32_t __arm_vmlsdavaxq_p_s32(int32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_p_s32))) int32_t __arm_vmlsdavaxq_p(int32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_p_s8))) int32_t __arm_vmlsdavaxq_p_s8(int32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_p_s8))) int32_t __arm_vmlsdavaxq_p(int32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_s16))) int32_t __arm_vmlsdavaxq_s16(int32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_s16))) int32_t __arm_vmlsdavaxq(int32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_s32))) int32_t __arm_vmlsdavaxq_s32(int32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_s32))) int32_t __arm_vmlsdavaxq(int32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_s8))) int32_t __arm_vmlsdavaxq_s8(int32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_s8))) int32_t __arm_vmlsdavaxq(int32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_p_s16))) int32_t __arm_vmlsdavq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_p_s16))) int32_t __arm_vmlsdavq_p(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_p_s32))) int32_t __arm_vmlsdavq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_p_s32))) int32_t __arm_vmlsdavq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_p_s8))) int32_t __arm_vmlsdavq_p_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_p_s8))) int32_t __arm_vmlsdavq_p(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_s16))) int32_t __arm_vmlsdavq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_s16))) int32_t __arm_vmlsdavq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_s32))) int32_t __arm_vmlsdavq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_s32))) int32_t __arm_vmlsdavq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_s8))) int32_t __arm_vmlsdavq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_s8))) int32_t __arm_vmlsdavq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_p_s16))) int32_t __arm_vmlsdavxq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_p_s16))) int32_t __arm_vmlsdavxq_p(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_p_s32))) int32_t __arm_vmlsdavxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_p_s32))) int32_t __arm_vmlsdavxq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_p_s8))) int32_t __arm_vmlsdavxq_p_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_p_s8))) int32_t __arm_vmlsdavxq_p(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_s16))) int32_t __arm_vmlsdavxq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_s16))) int32_t __arm_vmlsdavxq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_s32))) int32_t __arm_vmlsdavxq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_s32))) int32_t __arm_vmlsdavxq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_s8))) int32_t __arm_vmlsdavxq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_s8))) int32_t __arm_vmlsdavxq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaq_p_s16))) int64_t __arm_vmlsldavaq_p_s16(int64_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaq_p_s16))) int64_t __arm_vmlsldavaq_p(int64_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaq_p_s32))) int64_t __arm_vmlsldavaq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaq_p_s32))) int64_t __arm_vmlsldavaq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaq_s16))) int64_t __arm_vmlsldavaq_s16(int64_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaq_s16))) int64_t __arm_vmlsldavaq(int64_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaq_s32))) int64_t __arm_vmlsldavaq_s32(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaq_s32))) int64_t __arm_vmlsldavaq(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaxq_p_s16))) int64_t __arm_vmlsldavaxq_p_s16(int64_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaxq_p_s16))) int64_t __arm_vmlsldavaxq_p(int64_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaxq_p_s32))) int64_t __arm_vmlsldavaxq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaxq_p_s32))) int64_t __arm_vmlsldavaxq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaxq_s16))) int64_t __arm_vmlsldavaxq_s16(int64_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaxq_s16))) int64_t __arm_vmlsldavaxq(int64_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaxq_s32))) int64_t __arm_vmlsldavaxq_s32(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaxq_s32))) int64_t __arm_vmlsldavaxq(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavq_p_s16))) int64_t __arm_vmlsldavq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavq_p_s16))) int64_t __arm_vmlsldavq_p(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavq_p_s32))) int64_t __arm_vmlsldavq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavq_p_s32))) int64_t __arm_vmlsldavq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavq_s16))) int64_t __arm_vmlsldavq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavq_s16))) int64_t __arm_vmlsldavq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavq_s32))) int64_t __arm_vmlsldavq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavq_s32))) int64_t __arm_vmlsldavq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavxq_p_s16))) int64_t __arm_vmlsldavxq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavxq_p_s16))) int64_t __arm_vmlsldavxq_p(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavxq_p_s32))) int64_t __arm_vmlsldavxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavxq_p_s32))) int64_t __arm_vmlsldavxq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavxq_s16))) int64_t __arm_vmlsldavxq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavxq_s16))) int64_t __arm_vmlsldavxq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavxq_s32))) int64_t __arm_vmlsldavxq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavxq_s32))) int64_t __arm_vmlsldavxq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_m_s16))) int32x4_t __arm_vmovlbq_m_s16(int32x4_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_m_s16))) int32x4_t __arm_vmovlbq_m(int32x4_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_m_s8))) int16x8_t __arm_vmovlbq_m_s8(int16x8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_m_s8))) int16x8_t __arm_vmovlbq_m(int16x8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_m_u16))) uint32x4_t __arm_vmovlbq_m_u16(uint32x4_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_m_u16))) uint32x4_t __arm_vmovlbq_m(uint32x4_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_m_u8))) uint16x8_t __arm_vmovlbq_m_u8(uint16x8_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_m_u8))) uint16x8_t __arm_vmovlbq_m(uint16x8_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_s16))) int32x4_t __arm_vmovlbq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_s16))) int32x4_t __arm_vmovlbq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_s8))) int16x8_t __arm_vmovlbq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_s8))) int16x8_t __arm_vmovlbq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_u16))) uint32x4_t __arm_vmovlbq_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_u16))) uint32x4_t __arm_vmovlbq(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_u8))) uint16x8_t __arm_vmovlbq_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_u8))) uint16x8_t __arm_vmovlbq(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_x_s16))) int32x4_t __arm_vmovlbq_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_x_s16))) int32x4_t __arm_vmovlbq_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_x_s8))) int16x8_t __arm_vmovlbq_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_x_s8))) int16x8_t __arm_vmovlbq_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_x_u16))) uint32x4_t __arm_vmovlbq_x_u16(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_x_u16))) uint32x4_t __arm_vmovlbq_x(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_x_u8))) uint16x8_t __arm_vmovlbq_x_u8(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_x_u8))) uint16x8_t __arm_vmovlbq_x(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_m_s16))) int32x4_t __arm_vmovltq_m_s16(int32x4_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_m_s16))) int32x4_t __arm_vmovltq_m(int32x4_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_m_s8))) int16x8_t __arm_vmovltq_m_s8(int16x8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_m_s8))) int16x8_t __arm_vmovltq_m(int16x8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_m_u16))) uint32x4_t __arm_vmovltq_m_u16(uint32x4_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_m_u16))) uint32x4_t __arm_vmovltq_m(uint32x4_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_m_u8))) uint16x8_t __arm_vmovltq_m_u8(uint16x8_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_m_u8))) uint16x8_t __arm_vmovltq_m(uint16x8_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_s16))) int32x4_t __arm_vmovltq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_s16))) int32x4_t __arm_vmovltq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_s8))) int16x8_t __arm_vmovltq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_s8))) int16x8_t __arm_vmovltq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_u16))) uint32x4_t __arm_vmovltq_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_u16))) uint32x4_t __arm_vmovltq(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_u8))) uint16x8_t __arm_vmovltq_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_u8))) uint16x8_t __arm_vmovltq(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_x_s16))) int32x4_t __arm_vmovltq_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_x_s16))) int32x4_t __arm_vmovltq_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_x_s8))) int16x8_t __arm_vmovltq_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_x_s8))) int16x8_t __arm_vmovltq_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_x_u16))) uint32x4_t __arm_vmovltq_x_u16(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_x_u16))) uint32x4_t __arm_vmovltq_x(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_x_u8))) uint16x8_t __arm_vmovltq_x_u8(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_x_u8))) uint16x8_t __arm_vmovltq_x(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_m_s16))) int8x16_t __arm_vmovnbq_m_s16(int8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_m_s16))) int8x16_t __arm_vmovnbq_m(int8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_m_s32))) int16x8_t __arm_vmovnbq_m_s32(int16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_m_s32))) int16x8_t __arm_vmovnbq_m(int16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_m_u16))) uint8x16_t __arm_vmovnbq_m_u16(uint8x16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_m_u16))) uint8x16_t __arm_vmovnbq_m(uint8x16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_m_u32))) uint16x8_t __arm_vmovnbq_m_u32(uint16x8_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_m_u32))) uint16x8_t __arm_vmovnbq_m(uint16x8_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_s16))) int8x16_t __arm_vmovnbq_s16(int8x16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_s16))) int8x16_t __arm_vmovnbq(int8x16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_s32))) int16x8_t __arm_vmovnbq_s32(int16x8_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_s32))) int16x8_t __arm_vmovnbq(int16x8_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_u16))) uint8x16_t __arm_vmovnbq_u16(uint8x16_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_u16))) uint8x16_t __arm_vmovnbq(uint8x16_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_u32))) uint16x8_t __arm_vmovnbq_u32(uint16x8_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_u32))) uint16x8_t __arm_vmovnbq(uint16x8_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_m_s16))) int8x16_t __arm_vmovntq_m_s16(int8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_m_s16))) int8x16_t __arm_vmovntq_m(int8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_m_s32))) int16x8_t __arm_vmovntq_m_s32(int16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_m_s32))) int16x8_t __arm_vmovntq_m(int16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_m_u16))) uint8x16_t __arm_vmovntq_m_u16(uint8x16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_m_u16))) uint8x16_t __arm_vmovntq_m(uint8x16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_m_u32))) uint16x8_t __arm_vmovntq_m_u32(uint16x8_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_m_u32))) uint16x8_t __arm_vmovntq_m(uint16x8_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_s16))) int8x16_t __arm_vmovntq_s16(int8x16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_s16))) int8x16_t __arm_vmovntq(int8x16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_s32))) int16x8_t __arm_vmovntq_s32(int16x8_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_s32))) int16x8_t __arm_vmovntq(int16x8_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_u16))) uint8x16_t __arm_vmovntq_u16(uint8x16_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_u16))) uint8x16_t __arm_vmovntq(uint8x16_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_u32))) uint16x8_t __arm_vmovntq_u32(uint16x8_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_u32))) uint16x8_t __arm_vmovntq(uint16x8_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_s16))) int16x8_t __arm_vmulhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_s16))) int16x8_t __arm_vmulhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_s32))) int32x4_t __arm_vmulhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_s32))) int32x4_t __arm_vmulhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_s8))) int8x16_t __arm_vmulhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_s8))) int8x16_t __arm_vmulhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_u16))) uint16x8_t __arm_vmulhq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_u16))) uint16x8_t __arm_vmulhq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_u32))) uint32x4_t __arm_vmulhq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_u32))) uint32x4_t __arm_vmulhq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_u8))) uint8x16_t __arm_vmulhq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_u8))) uint8x16_t __arm_vmulhq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_s16))) int16x8_t __arm_vmulhq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_s16))) int16x8_t __arm_vmulhq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_s32))) int32x4_t __arm_vmulhq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_s32))) int32x4_t __arm_vmulhq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_s8))) int8x16_t __arm_vmulhq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_s8))) int8x16_t __arm_vmulhq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_u16))) uint16x8_t __arm_vmulhq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_u16))) uint16x8_t __arm_vmulhq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_u32))) uint32x4_t __arm_vmulhq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_u32))) uint32x4_t __arm_vmulhq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_u8))) uint8x16_t __arm_vmulhq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_u8))) uint8x16_t __arm_vmulhq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_s16))) int16x8_t __arm_vmulhq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_s16))) int16x8_t __arm_vmulhq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_s32))) int32x4_t __arm_vmulhq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_s32))) int32x4_t __arm_vmulhq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_s8))) int8x16_t __arm_vmulhq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_s8))) int8x16_t __arm_vmulhq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_u16))) uint16x8_t __arm_vmulhq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_u16))) uint16x8_t __arm_vmulhq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_u32))) uint32x4_t __arm_vmulhq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_u32))) uint32x4_t __arm_vmulhq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_u8))) uint8x16_t __arm_vmulhq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_u8))) uint8x16_t __arm_vmulhq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_s16))) int32x4_t __arm_vmullbq_int_m_s16(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_s16))) int32x4_t __arm_vmullbq_int_m(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_s32))) int64x2_t __arm_vmullbq_int_m_s32(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_s32))) int64x2_t __arm_vmullbq_int_m(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_s8))) int16x8_t __arm_vmullbq_int_m_s8(int16x8_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_s8))) int16x8_t __arm_vmullbq_int_m(int16x8_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_u16))) uint32x4_t __arm_vmullbq_int_m_u16(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_u16))) uint32x4_t __arm_vmullbq_int_m(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_u32))) uint64x2_t __arm_vmullbq_int_m_u32(uint64x2_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_u32))) uint64x2_t __arm_vmullbq_int_m(uint64x2_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_u8))) uint16x8_t __arm_vmullbq_int_m_u8(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_u8))) uint16x8_t __arm_vmullbq_int_m(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_s16))) int32x4_t __arm_vmullbq_int_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_s16))) int32x4_t __arm_vmullbq_int(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_s32))) int64x2_t __arm_vmullbq_int_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_s32))) int64x2_t __arm_vmullbq_int(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_s8))) int16x8_t __arm_vmullbq_int_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_s8))) int16x8_t __arm_vmullbq_int(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_u16))) uint32x4_t __arm_vmullbq_int_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_u16))) uint32x4_t __arm_vmullbq_int(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_u32))) uint64x2_t __arm_vmullbq_int_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_u32))) uint64x2_t __arm_vmullbq_int(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_u8))) uint16x8_t __arm_vmullbq_int_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_u8))) uint16x8_t __arm_vmullbq_int(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_s16))) int32x4_t __arm_vmullbq_int_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_s16))) int32x4_t __arm_vmullbq_int_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_s32))) int64x2_t __arm_vmullbq_int_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_s32))) int64x2_t __arm_vmullbq_int_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_s8))) int16x8_t __arm_vmullbq_int_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_s8))) int16x8_t __arm_vmullbq_int_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_u16))) uint32x4_t __arm_vmullbq_int_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_u16))) uint32x4_t __arm_vmullbq_int_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_u32))) uint64x2_t __arm_vmullbq_int_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_u32))) uint64x2_t __arm_vmullbq_int_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_u8))) uint16x8_t __arm_vmullbq_int_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_u8))) uint16x8_t __arm_vmullbq_int_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_m_p16))) uint32x4_t __arm_vmullbq_poly_m_p16(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_m_p16))) uint32x4_t __arm_vmullbq_poly_m(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_m_p8))) uint16x8_t __arm_vmullbq_poly_m_p8(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_m_p8))) uint16x8_t __arm_vmullbq_poly_m(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_p16))) uint32x4_t __arm_vmullbq_poly_p16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_p16))) uint32x4_t __arm_vmullbq_poly(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_p8))) uint16x8_t __arm_vmullbq_poly_p8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_p8))) uint16x8_t __arm_vmullbq_poly(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_x_p16))) uint32x4_t __arm_vmullbq_poly_x_p16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_x_p16))) uint32x4_t __arm_vmullbq_poly_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_x_p8))) uint16x8_t __arm_vmullbq_poly_x_p8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_x_p8))) uint16x8_t __arm_vmullbq_poly_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_s16))) int32x4_t __arm_vmulltq_int_m_s16(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_s16))) int32x4_t __arm_vmulltq_int_m(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_s32))) int64x2_t __arm_vmulltq_int_m_s32(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_s32))) int64x2_t __arm_vmulltq_int_m(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_s8))) int16x8_t __arm_vmulltq_int_m_s8(int16x8_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_s8))) int16x8_t __arm_vmulltq_int_m(int16x8_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_u16))) uint32x4_t __arm_vmulltq_int_m_u16(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_u16))) uint32x4_t __arm_vmulltq_int_m(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_u32))) uint64x2_t __arm_vmulltq_int_m_u32(uint64x2_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_u32))) uint64x2_t __arm_vmulltq_int_m(uint64x2_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_u8))) uint16x8_t __arm_vmulltq_int_m_u8(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_u8))) uint16x8_t __arm_vmulltq_int_m(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_s16))) int32x4_t __arm_vmulltq_int_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_s16))) int32x4_t __arm_vmulltq_int(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_s32))) int64x2_t __arm_vmulltq_int_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_s32))) int64x2_t __arm_vmulltq_int(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_s8))) int16x8_t __arm_vmulltq_int_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_s8))) int16x8_t __arm_vmulltq_int(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_u16))) uint32x4_t __arm_vmulltq_int_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_u16))) uint32x4_t __arm_vmulltq_int(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_u32))) uint64x2_t __arm_vmulltq_int_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_u32))) uint64x2_t __arm_vmulltq_int(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_u8))) uint16x8_t __arm_vmulltq_int_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_u8))) uint16x8_t __arm_vmulltq_int(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_s16))) int32x4_t __arm_vmulltq_int_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_s16))) int32x4_t __arm_vmulltq_int_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_s32))) int64x2_t __arm_vmulltq_int_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_s32))) int64x2_t __arm_vmulltq_int_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_s8))) int16x8_t __arm_vmulltq_int_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_s8))) int16x8_t __arm_vmulltq_int_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_u16))) uint32x4_t __arm_vmulltq_int_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_u16))) uint32x4_t __arm_vmulltq_int_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_u32))) uint64x2_t __arm_vmulltq_int_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_u32))) uint64x2_t __arm_vmulltq_int_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_u8))) uint16x8_t __arm_vmulltq_int_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_u8))) uint16x8_t __arm_vmulltq_int_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_m_p16))) uint32x4_t __arm_vmulltq_poly_m_p16(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_m_p16))) uint32x4_t __arm_vmulltq_poly_m(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_m_p8))) uint16x8_t __arm_vmulltq_poly_m_p8(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_m_p8))) uint16x8_t __arm_vmulltq_poly_m(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_p16))) uint32x4_t __arm_vmulltq_poly_p16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_p16))) uint32x4_t __arm_vmulltq_poly(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_p8))) uint16x8_t __arm_vmulltq_poly_p8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_p8))) uint16x8_t __arm_vmulltq_poly(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_x_p16))) uint32x4_t __arm_vmulltq_poly_x_p16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_x_p16))) uint32x4_t __arm_vmulltq_poly_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_x_p8))) uint16x8_t __arm_vmulltq_poly_x_p8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_x_p8))) uint16x8_t __arm_vmulltq_poly_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_s16))) int16x8_t __arm_vmulq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_s16))) int16x8_t __arm_vmulq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_s32))) int32x4_t __arm_vmulq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_s32))) int32x4_t __arm_vmulq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_s8))) int8x16_t __arm_vmulq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_s8))) int8x16_t __arm_vmulq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_u16))) uint16x8_t __arm_vmulq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_u16))) uint16x8_t __arm_vmulq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_u32))) uint32x4_t __arm_vmulq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_u32))) uint32x4_t __arm_vmulq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_u8))) uint8x16_t __arm_vmulq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_u8))) uint8x16_t __arm_vmulq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_s16))) int16x8_t __arm_vmulq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_s16))) int16x8_t __arm_vmulq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_s32))) int32x4_t __arm_vmulq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_s32))) int32x4_t __arm_vmulq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_s8))) int8x16_t __arm_vmulq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_s8))) int8x16_t __arm_vmulq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_u16))) uint16x8_t __arm_vmulq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_u16))) uint16x8_t __arm_vmulq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_u32))) uint32x4_t __arm_vmulq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_u32))) uint32x4_t __arm_vmulq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_u8))) uint8x16_t __arm_vmulq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_u8))) uint8x16_t __arm_vmulq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_s16))) int16x8_t __arm_vmulq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_s16))) int16x8_t __arm_vmulq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_s32))) int32x4_t __arm_vmulq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_s32))) int32x4_t __arm_vmulq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_s8))) int8x16_t __arm_vmulq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_s8))) int8x16_t __arm_vmulq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_u16))) uint16x8_t __arm_vmulq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_u16))) uint16x8_t __arm_vmulq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_u32))) uint32x4_t __arm_vmulq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_u32))) uint32x4_t __arm_vmulq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_u8))) uint8x16_t __arm_vmulq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_u8))) uint8x16_t __arm_vmulq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_s16))) int16x8_t __arm_vmulq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_s16))) int16x8_t __arm_vmulq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_s32))) int32x4_t __arm_vmulq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_s32))) int32x4_t __arm_vmulq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_s8))) int8x16_t __arm_vmulq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_s8))) int8x16_t __arm_vmulq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_u16))) uint16x8_t __arm_vmulq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_u16))) uint16x8_t __arm_vmulq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_u32))) uint32x4_t __arm_vmulq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_u32))) uint32x4_t __arm_vmulq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_u8))) uint8x16_t __arm_vmulq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_u8))) uint8x16_t __arm_vmulq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_s16))) int16x8_t __arm_vmulq_x_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_s16))) int16x8_t __arm_vmulq_x(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_s32))) int32x4_t __arm_vmulq_x_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_s32))) int32x4_t __arm_vmulq_x(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_s8))) int8x16_t __arm_vmulq_x_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_s8))) int8x16_t __arm_vmulq_x(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_u16))) uint16x8_t __arm_vmulq_x_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_u16))) uint16x8_t __arm_vmulq_x(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_u32))) uint32x4_t __arm_vmulq_x_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_u32))) uint32x4_t __arm_vmulq_x(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_u8))) uint8x16_t __arm_vmulq_x_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_u8))) uint8x16_t __arm_vmulq_x(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_s16))) int16x8_t __arm_vmulq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_s16))) int16x8_t __arm_vmulq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_s32))) int32x4_t __arm_vmulq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_s32))) int32x4_t __arm_vmulq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_s8))) int8x16_t __arm_vmulq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_s8))) int8x16_t __arm_vmulq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_u16))) uint16x8_t __arm_vmulq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_u16))) uint16x8_t __arm_vmulq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_u32))) uint32x4_t __arm_vmulq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_u32))) uint32x4_t __arm_vmulq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_u8))) uint8x16_t __arm_vmulq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_u8))) uint8x16_t __arm_vmulq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_n_s16))) int16x8_t __arm_vmvnq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_n_s16))) int16x8_t __arm_vmvnq_m(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_n_s32))) int32x4_t __arm_vmvnq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_n_s32))) int32x4_t __arm_vmvnq_m(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_n_u16))) uint16x8_t __arm_vmvnq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_n_u16))) uint16x8_t __arm_vmvnq_m(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_n_u32))) uint32x4_t __arm_vmvnq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_n_u32))) uint32x4_t __arm_vmvnq_m(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_s16))) int16x8_t __arm_vmvnq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_s16))) int16x8_t __arm_vmvnq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_s32))) int32x4_t __arm_vmvnq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_s32))) int32x4_t __arm_vmvnq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_s8))) int8x16_t __arm_vmvnq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_s8))) int8x16_t __arm_vmvnq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_u16))) uint16x8_t __arm_vmvnq_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_u16))) uint16x8_t __arm_vmvnq_m(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_u32))) uint32x4_t __arm_vmvnq_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_u32))) uint32x4_t __arm_vmvnq_m(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_u8))) uint8x16_t __arm_vmvnq_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_u8))) uint8x16_t __arm_vmvnq_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_n_s16))) int16x8_t __arm_vmvnq_n_s16(int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_n_s32))) int32x4_t __arm_vmvnq_n_s32(int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_n_u16))) uint16x8_t __arm_vmvnq_n_u16(uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_n_u32))) uint32x4_t __arm_vmvnq_n_u32(uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_s16))) int16x8_t __arm_vmvnq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_s16))) int16x8_t __arm_vmvnq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_s32))) int32x4_t __arm_vmvnq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_s32))) int32x4_t __arm_vmvnq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_s8))) int8x16_t __arm_vmvnq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_s8))) int8x16_t __arm_vmvnq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_u16))) uint16x8_t __arm_vmvnq_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_u16))) uint16x8_t __arm_vmvnq(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_u32))) uint32x4_t __arm_vmvnq_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_u32))) uint32x4_t __arm_vmvnq(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_u8))) uint8x16_t __arm_vmvnq_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_u8))) uint8x16_t __arm_vmvnq(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_n_s16))) int16x8_t __arm_vmvnq_x_n_s16(int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_n_s32))) int32x4_t __arm_vmvnq_x_n_s32(int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_n_u16))) uint16x8_t __arm_vmvnq_x_n_u16(uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_n_u32))) uint32x4_t __arm_vmvnq_x_n_u32(uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_s16))) int16x8_t __arm_vmvnq_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_s16))) int16x8_t __arm_vmvnq_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_s32))) int32x4_t __arm_vmvnq_x_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_s32))) int32x4_t __arm_vmvnq_x(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_s8))) int8x16_t __arm_vmvnq_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_s8))) int8x16_t __arm_vmvnq_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_u16))) uint16x8_t __arm_vmvnq_x_u16(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_u16))) uint16x8_t __arm_vmvnq_x(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_u32))) uint32x4_t __arm_vmvnq_x_u32(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_u32))) uint32x4_t __arm_vmvnq_x(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_u8))) uint8x16_t __arm_vmvnq_x_u8(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_u8))) uint8x16_t __arm_vmvnq_x(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_s16))) int16x8_t __arm_vnegq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_s16))) int16x8_t __arm_vnegq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_s32))) int32x4_t __arm_vnegq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_s32))) int32x4_t __arm_vnegq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_s8))) int8x16_t __arm_vnegq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_s8))) int8x16_t __arm_vnegq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_s16))) int16x8_t __arm_vnegq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_s16))) int16x8_t __arm_vnegq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_s32))) int32x4_t __arm_vnegq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_s32))) int32x4_t __arm_vnegq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_s8))) int8x16_t __arm_vnegq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_s8))) int8x16_t __arm_vnegq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_s16))) int16x8_t __arm_vnegq_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_s16))) int16x8_t __arm_vnegq_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_s32))) int32x4_t __arm_vnegq_x_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_s32))) int32x4_t __arm_vnegq_x(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_s8))) int8x16_t __arm_vnegq_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_s8))) int8x16_t __arm_vnegq_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_s16))) int16x8_t __arm_vornq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_s16))) int16x8_t __arm_vornq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_s32))) int32x4_t __arm_vornq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_s32))) int32x4_t __arm_vornq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_s8))) int8x16_t __arm_vornq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_s8))) int8x16_t __arm_vornq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_u16))) uint16x8_t __arm_vornq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_u16))) uint16x8_t __arm_vornq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_u32))) uint32x4_t __arm_vornq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_u32))) uint32x4_t __arm_vornq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_u8))) uint8x16_t __arm_vornq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_u8))) uint8x16_t __arm_vornq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_s16))) int16x8_t __arm_vornq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_s16))) int16x8_t __arm_vornq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_s32))) int32x4_t __arm_vornq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_s32))) int32x4_t __arm_vornq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_s8))) int8x16_t __arm_vornq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_s8))) int8x16_t __arm_vornq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_u16))) uint16x8_t __arm_vornq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_u16))) uint16x8_t __arm_vornq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_u32))) uint32x4_t __arm_vornq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_u32))) uint32x4_t __arm_vornq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_u8))) uint8x16_t __arm_vornq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_u8))) uint8x16_t __arm_vornq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_s16))) int16x8_t __arm_vornq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_s16))) int16x8_t __arm_vornq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_s32))) int32x4_t __arm_vornq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_s32))) int32x4_t __arm_vornq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_s8))) int8x16_t __arm_vornq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_s8))) int8x16_t __arm_vornq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_u16))) uint16x8_t __arm_vornq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_u16))) uint16x8_t __arm_vornq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_u32))) uint32x4_t __arm_vornq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_u32))) uint32x4_t __arm_vornq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_u8))) uint8x16_t __arm_vornq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_u8))) uint8x16_t __arm_vornq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_n_s16))) int16x8_t __arm_vorrq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_n_s16))) int16x8_t __arm_vorrq_m_n(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_n_s32))) int32x4_t __arm_vorrq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_n_s32))) int32x4_t __arm_vorrq_m_n(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_n_u16))) uint16x8_t __arm_vorrq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_n_u16))) uint16x8_t __arm_vorrq_m_n(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_n_u32))) uint32x4_t __arm_vorrq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_n_u32))) uint32x4_t __arm_vorrq_m_n(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_s16))) int16x8_t __arm_vorrq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_s16))) int16x8_t __arm_vorrq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_s32))) int32x4_t __arm_vorrq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_s32))) int32x4_t __arm_vorrq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_s8))) int8x16_t __arm_vorrq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_s8))) int8x16_t __arm_vorrq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_u16))) uint16x8_t __arm_vorrq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_u16))) uint16x8_t __arm_vorrq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_u32))) uint32x4_t __arm_vorrq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_u32))) uint32x4_t __arm_vorrq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_u8))) uint8x16_t __arm_vorrq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_u8))) uint8x16_t __arm_vorrq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_n_s16))) int16x8_t __arm_vorrq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_n_s16))) int16x8_t __arm_vorrq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_n_s32))) int32x4_t __arm_vorrq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_n_s32))) int32x4_t __arm_vorrq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_n_u16))) uint16x8_t __arm_vorrq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_n_u16))) uint16x8_t __arm_vorrq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_n_u32))) uint32x4_t __arm_vorrq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_n_u32))) uint32x4_t __arm_vorrq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_s16))) int16x8_t __arm_vorrq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_s16))) int16x8_t __arm_vorrq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_s32))) int32x4_t __arm_vorrq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_s32))) int32x4_t __arm_vorrq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_s8))) int8x16_t __arm_vorrq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_s8))) int8x16_t __arm_vorrq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_u16))) uint16x8_t __arm_vorrq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_u16))) uint16x8_t __arm_vorrq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_u32))) uint32x4_t __arm_vorrq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_u32))) uint32x4_t __arm_vorrq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_u8))) uint8x16_t __arm_vorrq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_u8))) uint8x16_t __arm_vorrq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_s16))) int16x8_t __arm_vorrq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_s16))) int16x8_t __arm_vorrq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_s32))) int32x4_t __arm_vorrq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_s32))) int32x4_t __arm_vorrq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_s8))) int8x16_t __arm_vorrq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_s8))) int8x16_t __arm_vorrq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_u16))) uint16x8_t __arm_vorrq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_u16))) uint16x8_t __arm_vorrq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_u32))) uint32x4_t __arm_vorrq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_u32))) uint32x4_t __arm_vorrq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_u8))) uint8x16_t __arm_vorrq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_u8))) uint8x16_t __arm_vorrq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpnot))) mve_pred16_t __arm_vpnot(mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_s16))) int16x8_t __arm_vpselq_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_s16))) int16x8_t __arm_vpselq(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_s32))) int32x4_t __arm_vpselq_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_s32))) int32x4_t __arm_vpselq(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_s64))) int64x2_t __arm_vpselq_s64(int64x2_t, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_s64))) int64x2_t __arm_vpselq(int64x2_t, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_s8))) int8x16_t __arm_vpselq_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_s8))) int8x16_t __arm_vpselq(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_u16))) uint16x8_t __arm_vpselq_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_u16))) uint16x8_t __arm_vpselq(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_u32))) uint32x4_t __arm_vpselq_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_u32))) uint32x4_t __arm_vpselq(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_u64))) uint64x2_t __arm_vpselq_u64(uint64x2_t, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_u64))) uint64x2_t __arm_vpselq(uint64x2_t, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_u8))) uint8x16_t __arm_vpselq_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_u8))) uint8x16_t __arm_vpselq(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_m_s16))) int16x8_t __arm_vqabsq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_m_s16))) int16x8_t __arm_vqabsq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_m_s32))) int32x4_t __arm_vqabsq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_m_s32))) int32x4_t __arm_vqabsq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_m_s8))) int8x16_t __arm_vqabsq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_m_s8))) int8x16_t __arm_vqabsq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_s16))) int16x8_t __arm_vqabsq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_s16))) int16x8_t __arm_vqabsq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_s32))) int32x4_t __arm_vqabsq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_s32))) int32x4_t __arm_vqabsq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_s8))) int8x16_t __arm_vqabsq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_s8))) int8x16_t __arm_vqabsq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_s16))) int16x8_t __arm_vqaddq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_s16))) int16x8_t __arm_vqaddq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_s32))) int32x4_t __arm_vqaddq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_s32))) int32x4_t __arm_vqaddq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_s8))) int8x16_t __arm_vqaddq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_s8))) int8x16_t __arm_vqaddq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_u16))) uint16x8_t __arm_vqaddq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_u16))) uint16x8_t __arm_vqaddq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_u32))) uint32x4_t __arm_vqaddq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_u32))) uint32x4_t __arm_vqaddq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_u8))) uint8x16_t __arm_vqaddq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_u8))) uint8x16_t __arm_vqaddq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_s16))) int16x8_t __arm_vqaddq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_s16))) int16x8_t __arm_vqaddq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_s32))) int32x4_t __arm_vqaddq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_s32))) int32x4_t __arm_vqaddq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_s8))) int8x16_t __arm_vqaddq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_s8))) int8x16_t __arm_vqaddq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_u16))) uint16x8_t __arm_vqaddq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_u16))) uint16x8_t __arm_vqaddq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_u32))) uint32x4_t __arm_vqaddq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_u32))) uint32x4_t __arm_vqaddq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_u8))) uint8x16_t __arm_vqaddq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_u8))) uint8x16_t __arm_vqaddq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_s16))) int16x8_t __arm_vqaddq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_s16))) int16x8_t __arm_vqaddq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_s32))) int32x4_t __arm_vqaddq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_s32))) int32x4_t __arm_vqaddq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_s8))) int8x16_t __arm_vqaddq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_s8))) int8x16_t __arm_vqaddq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_u16))) uint16x8_t __arm_vqaddq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_u16))) uint16x8_t __arm_vqaddq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_u32))) uint32x4_t __arm_vqaddq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_u32))) uint32x4_t __arm_vqaddq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_u8))) uint8x16_t __arm_vqaddq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_u8))) uint8x16_t __arm_vqaddq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_s16))) int16x8_t __arm_vqaddq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_s16))) int16x8_t __arm_vqaddq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_s32))) int32x4_t __arm_vqaddq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_s32))) int32x4_t __arm_vqaddq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_s8))) int8x16_t __arm_vqaddq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_s8))) int8x16_t __arm_vqaddq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_u16))) uint16x8_t __arm_vqaddq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_u16))) uint16x8_t __arm_vqaddq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_u32))) uint32x4_t __arm_vqaddq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_u32))) uint32x4_t __arm_vqaddq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_u8))) uint8x16_t __arm_vqaddq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_u8))) uint8x16_t __arm_vqaddq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_m_s16))) int16x8_t __arm_vqdmladhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_m_s16))) int16x8_t __arm_vqdmladhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_m_s32))) int32x4_t __arm_vqdmladhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_m_s32))) int32x4_t __arm_vqdmladhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_m_s8))) int8x16_t __arm_vqdmladhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_m_s8))) int8x16_t __arm_vqdmladhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_s16))) int16x8_t __arm_vqdmladhq_s16(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_s16))) int16x8_t __arm_vqdmladhq(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_s32))) int32x4_t __arm_vqdmladhq_s32(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_s32))) int32x4_t __arm_vqdmladhq(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_s8))) int8x16_t __arm_vqdmladhq_s8(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_s8))) int8x16_t __arm_vqdmladhq(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_m_s16))) int16x8_t __arm_vqdmladhxq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_m_s16))) int16x8_t __arm_vqdmladhxq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_m_s32))) int32x4_t __arm_vqdmladhxq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_m_s32))) int32x4_t __arm_vqdmladhxq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_m_s8))) int8x16_t __arm_vqdmladhxq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_m_s8))) int8x16_t __arm_vqdmladhxq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_s16))) int16x8_t __arm_vqdmladhxq_s16(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_s16))) int16x8_t __arm_vqdmladhxq(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_s32))) int32x4_t __arm_vqdmladhxq_s32(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_s32))) int32x4_t __arm_vqdmladhxq(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_s8))) int8x16_t __arm_vqdmladhxq_s8(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_s8))) int8x16_t __arm_vqdmladhxq(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_m_n_s16))) int16x8_t __arm_vqdmlahq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_m_n_s16))) int16x8_t __arm_vqdmlahq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_m_n_s32))) int32x4_t __arm_vqdmlahq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_m_n_s32))) int32x4_t __arm_vqdmlahq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_m_n_s8))) int8x16_t __arm_vqdmlahq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_m_n_s8))) int8x16_t __arm_vqdmlahq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_n_s16))) int16x8_t __arm_vqdmlahq_n_s16(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_n_s16))) int16x8_t __arm_vqdmlahq(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_n_s32))) int32x4_t __arm_vqdmlahq_n_s32(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_n_s32))) int32x4_t __arm_vqdmlahq(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_n_s8))) int8x16_t __arm_vqdmlahq_n_s8(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_n_s8))) int8x16_t __arm_vqdmlahq(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_m_n_s16))) int16x8_t __arm_vqdmlashq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_m_n_s16))) int16x8_t __arm_vqdmlashq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_m_n_s32))) int32x4_t __arm_vqdmlashq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_m_n_s32))) int32x4_t __arm_vqdmlashq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_m_n_s8))) int8x16_t __arm_vqdmlashq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_m_n_s8))) int8x16_t __arm_vqdmlashq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_n_s16))) int16x8_t __arm_vqdmlashq_n_s16(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_n_s16))) int16x8_t __arm_vqdmlashq(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_n_s32))) int32x4_t __arm_vqdmlashq_n_s32(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_n_s32))) int32x4_t __arm_vqdmlashq(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_n_s8))) int8x16_t __arm_vqdmlashq_n_s8(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_n_s8))) int8x16_t __arm_vqdmlashq(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_m_s16))) int16x8_t __arm_vqdmlsdhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_m_s16))) int16x8_t __arm_vqdmlsdhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_m_s32))) int32x4_t __arm_vqdmlsdhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_m_s32))) int32x4_t __arm_vqdmlsdhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_m_s8))) int8x16_t __arm_vqdmlsdhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_m_s8))) int8x16_t __arm_vqdmlsdhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_s16))) int16x8_t __arm_vqdmlsdhq_s16(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_s16))) int16x8_t __arm_vqdmlsdhq(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_s32))) int32x4_t __arm_vqdmlsdhq_s32(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_s32))) int32x4_t __arm_vqdmlsdhq(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_s8))) int8x16_t __arm_vqdmlsdhq_s8(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_s8))) int8x16_t __arm_vqdmlsdhq(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_m_s16))) int16x8_t __arm_vqdmlsdhxq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_m_s16))) int16x8_t __arm_vqdmlsdhxq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_m_s32))) int32x4_t __arm_vqdmlsdhxq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_m_s32))) int32x4_t __arm_vqdmlsdhxq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_m_s8))) int8x16_t __arm_vqdmlsdhxq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_m_s8))) int8x16_t __arm_vqdmlsdhxq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_s16))) int16x8_t __arm_vqdmlsdhxq_s16(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_s16))) int16x8_t __arm_vqdmlsdhxq(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_s32))) int32x4_t __arm_vqdmlsdhxq_s32(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_s32))) int32x4_t __arm_vqdmlsdhxq(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_s8))) int8x16_t __arm_vqdmlsdhxq_s8(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_s8))) int8x16_t __arm_vqdmlsdhxq(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_n_s16))) int16x8_t __arm_vqdmulhq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_n_s16))) int16x8_t __arm_vqdmulhq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_n_s32))) int32x4_t __arm_vqdmulhq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_n_s32))) int32x4_t __arm_vqdmulhq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_n_s8))) int8x16_t __arm_vqdmulhq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_n_s8))) int8x16_t __arm_vqdmulhq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_s16))) int16x8_t __arm_vqdmulhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_s16))) int16x8_t __arm_vqdmulhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_s32))) int32x4_t __arm_vqdmulhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_s32))) int32x4_t __arm_vqdmulhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_s8))) int8x16_t __arm_vqdmulhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_s8))) int8x16_t __arm_vqdmulhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_n_s16))) int16x8_t __arm_vqdmulhq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_n_s16))) int16x8_t __arm_vqdmulhq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_n_s32))) int32x4_t __arm_vqdmulhq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_n_s32))) int32x4_t __arm_vqdmulhq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_n_s8))) int8x16_t __arm_vqdmulhq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_n_s8))) int8x16_t __arm_vqdmulhq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_s16))) int16x8_t __arm_vqdmulhq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_s16))) int16x8_t __arm_vqdmulhq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_s32))) int32x4_t __arm_vqdmulhq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_s32))) int32x4_t __arm_vqdmulhq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_s8))) int8x16_t __arm_vqdmulhq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_s8))) int8x16_t __arm_vqdmulhq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_m_n_s16))) int32x4_t __arm_vqdmullbq_m_n_s16(int32x4_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_m_n_s16))) int32x4_t __arm_vqdmullbq_m(int32x4_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_m_n_s32))) int64x2_t __arm_vqdmullbq_m_n_s32(int64x2_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_m_n_s32))) int64x2_t __arm_vqdmullbq_m(int64x2_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_m_s16))) int32x4_t __arm_vqdmullbq_m_s16(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_m_s16))) int32x4_t __arm_vqdmullbq_m(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_m_s32))) int64x2_t __arm_vqdmullbq_m_s32(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_m_s32))) int64x2_t __arm_vqdmullbq_m(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_n_s16))) int32x4_t __arm_vqdmullbq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_n_s16))) int32x4_t __arm_vqdmullbq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_n_s32))) int64x2_t __arm_vqdmullbq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_n_s32))) int64x2_t __arm_vqdmullbq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_s16))) int32x4_t __arm_vqdmullbq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_s16))) int32x4_t __arm_vqdmullbq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_s32))) int64x2_t __arm_vqdmullbq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_s32))) int64x2_t __arm_vqdmullbq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_m_n_s16))) int32x4_t __arm_vqdmulltq_m_n_s16(int32x4_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_m_n_s16))) int32x4_t __arm_vqdmulltq_m(int32x4_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_m_n_s32))) int64x2_t __arm_vqdmulltq_m_n_s32(int64x2_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_m_n_s32))) int64x2_t __arm_vqdmulltq_m(int64x2_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_m_s16))) int32x4_t __arm_vqdmulltq_m_s16(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_m_s16))) int32x4_t __arm_vqdmulltq_m(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_m_s32))) int64x2_t __arm_vqdmulltq_m_s32(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_m_s32))) int64x2_t __arm_vqdmulltq_m(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_n_s16))) int32x4_t __arm_vqdmulltq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_n_s16))) int32x4_t __arm_vqdmulltq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_n_s32))) int64x2_t __arm_vqdmulltq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_n_s32))) int64x2_t __arm_vqdmulltq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_s16))) int32x4_t __arm_vqdmulltq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_s16))) int32x4_t __arm_vqdmulltq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_s32))) int64x2_t __arm_vqdmulltq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_s32))) int64x2_t __arm_vqdmulltq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_m_s16))) int8x16_t __arm_vqmovnbq_m_s16(int8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_m_s16))) int8x16_t __arm_vqmovnbq_m(int8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_m_s32))) int16x8_t __arm_vqmovnbq_m_s32(int16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_m_s32))) int16x8_t __arm_vqmovnbq_m(int16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_m_u16))) uint8x16_t __arm_vqmovnbq_m_u16(uint8x16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_m_u16))) uint8x16_t __arm_vqmovnbq_m(uint8x16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_m_u32))) uint16x8_t __arm_vqmovnbq_m_u32(uint16x8_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_m_u32))) uint16x8_t __arm_vqmovnbq_m(uint16x8_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_s16))) int8x16_t __arm_vqmovnbq_s16(int8x16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_s16))) int8x16_t __arm_vqmovnbq(int8x16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_s32))) int16x8_t __arm_vqmovnbq_s32(int16x8_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_s32))) int16x8_t __arm_vqmovnbq(int16x8_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_u16))) uint8x16_t __arm_vqmovnbq_u16(uint8x16_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_u16))) uint8x16_t __arm_vqmovnbq(uint8x16_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_u32))) uint16x8_t __arm_vqmovnbq_u32(uint16x8_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_u32))) uint16x8_t __arm_vqmovnbq(uint16x8_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_m_s16))) int8x16_t __arm_vqmovntq_m_s16(int8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_m_s16))) int8x16_t __arm_vqmovntq_m(int8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_m_s32))) int16x8_t __arm_vqmovntq_m_s32(int16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_m_s32))) int16x8_t __arm_vqmovntq_m(int16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_m_u16))) uint8x16_t __arm_vqmovntq_m_u16(uint8x16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_m_u16))) uint8x16_t __arm_vqmovntq_m(uint8x16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_m_u32))) uint16x8_t __arm_vqmovntq_m_u32(uint16x8_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_m_u32))) uint16x8_t __arm_vqmovntq_m(uint16x8_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_s16))) int8x16_t __arm_vqmovntq_s16(int8x16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_s16))) int8x16_t __arm_vqmovntq(int8x16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_s32))) int16x8_t __arm_vqmovntq_s32(int16x8_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_s32))) int16x8_t __arm_vqmovntq(int16x8_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_u16))) uint8x16_t __arm_vqmovntq_u16(uint8x16_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_u16))) uint8x16_t __arm_vqmovntq(uint8x16_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_u32))) uint16x8_t __arm_vqmovntq_u32(uint16x8_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_u32))) uint16x8_t __arm_vqmovntq(uint16x8_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovunbq_m_s16))) uint8x16_t __arm_vqmovunbq_m_s16(uint8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovunbq_m_s16))) uint8x16_t __arm_vqmovunbq_m(uint8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovunbq_m_s32))) uint16x8_t __arm_vqmovunbq_m_s32(uint16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovunbq_m_s32))) uint16x8_t __arm_vqmovunbq_m(uint16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovunbq_s16))) uint8x16_t __arm_vqmovunbq_s16(uint8x16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovunbq_s16))) uint8x16_t __arm_vqmovunbq(uint8x16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovunbq_s32))) uint16x8_t __arm_vqmovunbq_s32(uint16x8_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovunbq_s32))) uint16x8_t __arm_vqmovunbq(uint16x8_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovuntq_m_s16))) uint8x16_t __arm_vqmovuntq_m_s16(uint8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovuntq_m_s16))) uint8x16_t __arm_vqmovuntq_m(uint8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovuntq_m_s32))) uint16x8_t __arm_vqmovuntq_m_s32(uint16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovuntq_m_s32))) uint16x8_t __arm_vqmovuntq_m(uint16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovuntq_s16))) uint8x16_t __arm_vqmovuntq_s16(uint8x16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovuntq_s16))) uint8x16_t __arm_vqmovuntq(uint8x16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovuntq_s32))) uint16x8_t __arm_vqmovuntq_s32(uint16x8_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovuntq_s32))) uint16x8_t __arm_vqmovuntq(uint16x8_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_m_s16))) int16x8_t __arm_vqnegq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_m_s16))) int16x8_t __arm_vqnegq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_m_s32))) int32x4_t __arm_vqnegq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_m_s32))) int32x4_t __arm_vqnegq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_m_s8))) int8x16_t __arm_vqnegq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_m_s8))) int8x16_t __arm_vqnegq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_s16))) int16x8_t __arm_vqnegq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_s16))) int16x8_t __arm_vqnegq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_s32))) int32x4_t __arm_vqnegq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_s32))) int32x4_t __arm_vqnegq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_s8))) int8x16_t __arm_vqnegq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_s8))) int8x16_t __arm_vqnegq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_m_s16))) int16x8_t __arm_vqrdmladhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_m_s16))) int16x8_t __arm_vqrdmladhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_m_s32))) int32x4_t __arm_vqrdmladhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_m_s32))) int32x4_t __arm_vqrdmladhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_m_s8))) int8x16_t __arm_vqrdmladhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_m_s8))) int8x16_t __arm_vqrdmladhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_s16))) int16x8_t __arm_vqrdmladhq_s16(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_s16))) int16x8_t __arm_vqrdmladhq(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_s32))) int32x4_t __arm_vqrdmladhq_s32(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_s32))) int32x4_t __arm_vqrdmladhq(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_s8))) int8x16_t __arm_vqrdmladhq_s8(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_s8))) int8x16_t __arm_vqrdmladhq(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_m_s16))) int16x8_t __arm_vqrdmladhxq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_m_s16))) int16x8_t __arm_vqrdmladhxq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_m_s32))) int32x4_t __arm_vqrdmladhxq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_m_s32))) int32x4_t __arm_vqrdmladhxq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_m_s8))) int8x16_t __arm_vqrdmladhxq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_m_s8))) int8x16_t __arm_vqrdmladhxq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_s16))) int16x8_t __arm_vqrdmladhxq_s16(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_s16))) int16x8_t __arm_vqrdmladhxq(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_s32))) int32x4_t __arm_vqrdmladhxq_s32(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_s32))) int32x4_t __arm_vqrdmladhxq(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_s8))) int8x16_t __arm_vqrdmladhxq_s8(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_s8))) int8x16_t __arm_vqrdmladhxq(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_m_n_s16))) int16x8_t __arm_vqrdmlahq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_m_n_s16))) int16x8_t __arm_vqrdmlahq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_m_n_s32))) int32x4_t __arm_vqrdmlahq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_m_n_s32))) int32x4_t __arm_vqrdmlahq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_m_n_s8))) int8x16_t __arm_vqrdmlahq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_m_n_s8))) int8x16_t __arm_vqrdmlahq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_n_s16))) int16x8_t __arm_vqrdmlahq_n_s16(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_n_s16))) int16x8_t __arm_vqrdmlahq(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_n_s32))) int32x4_t __arm_vqrdmlahq_n_s32(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_n_s32))) int32x4_t __arm_vqrdmlahq(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_n_s8))) int8x16_t __arm_vqrdmlahq_n_s8(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_n_s8))) int8x16_t __arm_vqrdmlahq(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_m_n_s16))) int16x8_t __arm_vqrdmlashq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_m_n_s16))) int16x8_t __arm_vqrdmlashq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_m_n_s32))) int32x4_t __arm_vqrdmlashq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_m_n_s32))) int32x4_t __arm_vqrdmlashq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_m_n_s8))) int8x16_t __arm_vqrdmlashq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_m_n_s8))) int8x16_t __arm_vqrdmlashq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_n_s16))) int16x8_t __arm_vqrdmlashq_n_s16(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_n_s16))) int16x8_t __arm_vqrdmlashq(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_n_s32))) int32x4_t __arm_vqrdmlashq_n_s32(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_n_s32))) int32x4_t __arm_vqrdmlashq(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_n_s8))) int8x16_t __arm_vqrdmlashq_n_s8(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_n_s8))) int8x16_t __arm_vqrdmlashq(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_m_s16))) int16x8_t __arm_vqrdmlsdhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_m_s16))) int16x8_t __arm_vqrdmlsdhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_m_s32))) int32x4_t __arm_vqrdmlsdhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_m_s32))) int32x4_t __arm_vqrdmlsdhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_m_s8))) int8x16_t __arm_vqrdmlsdhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_m_s8))) int8x16_t __arm_vqrdmlsdhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_s16))) int16x8_t __arm_vqrdmlsdhq_s16(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_s16))) int16x8_t __arm_vqrdmlsdhq(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_s32))) int32x4_t __arm_vqrdmlsdhq_s32(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_s32))) int32x4_t __arm_vqrdmlsdhq(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_s8))) int8x16_t __arm_vqrdmlsdhq_s8(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_s8))) int8x16_t __arm_vqrdmlsdhq(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_m_s16))) int16x8_t __arm_vqrdmlsdhxq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_m_s16))) int16x8_t __arm_vqrdmlsdhxq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_m_s32))) int32x4_t __arm_vqrdmlsdhxq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_m_s32))) int32x4_t __arm_vqrdmlsdhxq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_m_s8))) int8x16_t __arm_vqrdmlsdhxq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_m_s8))) int8x16_t __arm_vqrdmlsdhxq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_s16))) int16x8_t __arm_vqrdmlsdhxq_s16(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_s16))) int16x8_t __arm_vqrdmlsdhxq(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_s32))) int32x4_t __arm_vqrdmlsdhxq_s32(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_s32))) int32x4_t __arm_vqrdmlsdhxq(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_s8))) int8x16_t __arm_vqrdmlsdhxq_s8(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_s8))) int8x16_t __arm_vqrdmlsdhxq(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_n_s16))) int16x8_t __arm_vqrdmulhq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_n_s16))) int16x8_t __arm_vqrdmulhq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_n_s32))) int32x4_t __arm_vqrdmulhq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_n_s32))) int32x4_t __arm_vqrdmulhq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_n_s8))) int8x16_t __arm_vqrdmulhq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_n_s8))) int8x16_t __arm_vqrdmulhq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_s16))) int16x8_t __arm_vqrdmulhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_s16))) int16x8_t __arm_vqrdmulhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_s32))) int32x4_t __arm_vqrdmulhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_s32))) int32x4_t __arm_vqrdmulhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_s8))) int8x16_t __arm_vqrdmulhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_s8))) int8x16_t __arm_vqrdmulhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_n_s16))) int16x8_t __arm_vqrdmulhq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_n_s16))) int16x8_t __arm_vqrdmulhq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_n_s32))) int32x4_t __arm_vqrdmulhq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_n_s32))) int32x4_t __arm_vqrdmulhq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_n_s8))) int8x16_t __arm_vqrdmulhq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_n_s8))) int8x16_t __arm_vqrdmulhq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_s16))) int16x8_t __arm_vqrdmulhq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_s16))) int16x8_t __arm_vqrdmulhq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_s32))) int32x4_t __arm_vqrdmulhq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_s32))) int32x4_t __arm_vqrdmulhq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_s8))) int8x16_t __arm_vqrdmulhq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_s8))) int8x16_t __arm_vqrdmulhq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_s16))) int16x8_t __arm_vqrshlq_m_n_s16(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_s16))) int16x8_t __arm_vqrshlq_m_n(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_s32))) int32x4_t __arm_vqrshlq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_s32))) int32x4_t __arm_vqrshlq_m_n(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_s8))) int8x16_t __arm_vqrshlq_m_n_s8(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_s8))) int8x16_t __arm_vqrshlq_m_n(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_u16))) uint16x8_t __arm_vqrshlq_m_n_u16(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_u16))) uint16x8_t __arm_vqrshlq_m_n(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_u32))) uint32x4_t __arm_vqrshlq_m_n_u32(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_u32))) uint32x4_t __arm_vqrshlq_m_n(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_u8))) uint8x16_t __arm_vqrshlq_m_n_u8(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_u8))) uint8x16_t __arm_vqrshlq_m_n(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_s16))) int16x8_t __arm_vqrshlq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_s16))) int16x8_t __arm_vqrshlq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_s32))) int32x4_t __arm_vqrshlq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_s32))) int32x4_t __arm_vqrshlq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_s8))) int8x16_t __arm_vqrshlq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_s8))) int8x16_t __arm_vqrshlq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_u16))) uint16x8_t __arm_vqrshlq_m_u16(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_u16))) uint16x8_t __arm_vqrshlq_m(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_u32))) uint32x4_t __arm_vqrshlq_m_u32(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_u32))) uint32x4_t __arm_vqrshlq_m(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_u8))) uint8x16_t __arm_vqrshlq_m_u8(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_u8))) uint8x16_t __arm_vqrshlq_m(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_s16))) int16x8_t __arm_vqrshlq_n_s16(int16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_s16))) int16x8_t __arm_vqrshlq(int16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_s32))) int32x4_t __arm_vqrshlq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_s32))) int32x4_t __arm_vqrshlq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_s8))) int8x16_t __arm_vqrshlq_n_s8(int8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_s8))) int8x16_t __arm_vqrshlq(int8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_u16))) uint16x8_t __arm_vqrshlq_n_u16(uint16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_u16))) uint16x8_t __arm_vqrshlq(uint16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_u32))) uint32x4_t __arm_vqrshlq_n_u32(uint32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_u32))) uint32x4_t __arm_vqrshlq(uint32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_u8))) uint8x16_t __arm_vqrshlq_n_u8(uint8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_u8))) uint8x16_t __arm_vqrshlq(uint8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_s16))) int16x8_t __arm_vqrshlq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_s16))) int16x8_t __arm_vqrshlq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_s32))) int32x4_t __arm_vqrshlq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_s32))) int32x4_t __arm_vqrshlq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_s8))) int8x16_t __arm_vqrshlq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_s8))) int8x16_t __arm_vqrshlq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_u16))) uint16x8_t __arm_vqrshlq_u16(uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_u16))) uint16x8_t __arm_vqrshlq(uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_u32))) uint32x4_t __arm_vqrshlq_u32(uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_u32))) uint32x4_t __arm_vqrshlq(uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_u8))) uint8x16_t __arm_vqrshlq_u8(uint8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_u8))) uint8x16_t __arm_vqrshlq(uint8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_m_n_s16))) int8x16_t __arm_vqrshrnbq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_m_n_s16))) int8x16_t __arm_vqrshrnbq_m(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_m_n_s32))) int16x8_t __arm_vqrshrnbq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_m_n_s32))) int16x8_t __arm_vqrshrnbq_m(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_m_n_u16))) uint8x16_t __arm_vqrshrnbq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_m_n_u16))) uint8x16_t __arm_vqrshrnbq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_m_n_u32))) uint16x8_t __arm_vqrshrnbq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_m_n_u32))) uint16x8_t __arm_vqrshrnbq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_n_s16))) int8x16_t __arm_vqrshrnbq_n_s16(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_n_s16))) int8x16_t __arm_vqrshrnbq(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_n_s32))) int16x8_t __arm_vqrshrnbq_n_s32(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_n_s32))) int16x8_t __arm_vqrshrnbq(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_n_u16))) uint8x16_t __arm_vqrshrnbq_n_u16(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_n_u16))) uint8x16_t __arm_vqrshrnbq(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_n_u32))) uint16x8_t __arm_vqrshrnbq_n_u32(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_n_u32))) uint16x8_t __arm_vqrshrnbq(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_m_n_s16))) int8x16_t __arm_vqrshrntq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_m_n_s16))) int8x16_t __arm_vqrshrntq_m(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_m_n_s32))) int16x8_t __arm_vqrshrntq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_m_n_s32))) int16x8_t __arm_vqrshrntq_m(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_m_n_u16))) uint8x16_t __arm_vqrshrntq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_m_n_u16))) uint8x16_t __arm_vqrshrntq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_m_n_u32))) uint16x8_t __arm_vqrshrntq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_m_n_u32))) uint16x8_t __arm_vqrshrntq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_n_s16))) int8x16_t __arm_vqrshrntq_n_s16(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_n_s16))) int8x16_t __arm_vqrshrntq(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_n_s32))) int16x8_t __arm_vqrshrntq_n_s32(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_n_s32))) int16x8_t __arm_vqrshrntq(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_n_u16))) uint8x16_t __arm_vqrshrntq_n_u16(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_n_u16))) uint8x16_t __arm_vqrshrntq(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_n_u32))) uint16x8_t __arm_vqrshrntq_n_u32(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_n_u32))) uint16x8_t __arm_vqrshrntq(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrunbq_m_n_s16))) uint8x16_t __arm_vqrshrunbq_m_n_s16(uint8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrunbq_m_n_s16))) uint8x16_t __arm_vqrshrunbq_m(uint8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrunbq_m_n_s32))) uint16x8_t __arm_vqrshrunbq_m_n_s32(uint16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrunbq_m_n_s32))) uint16x8_t __arm_vqrshrunbq_m(uint16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrunbq_n_s16))) uint8x16_t __arm_vqrshrunbq_n_s16(uint8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrunbq_n_s16))) uint8x16_t __arm_vqrshrunbq(uint8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrunbq_n_s32))) uint16x8_t __arm_vqrshrunbq_n_s32(uint16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrunbq_n_s32))) uint16x8_t __arm_vqrshrunbq(uint16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshruntq_m_n_s16))) uint8x16_t __arm_vqrshruntq_m_n_s16(uint8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshruntq_m_n_s16))) uint8x16_t __arm_vqrshruntq_m(uint8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshruntq_m_n_s32))) uint16x8_t __arm_vqrshruntq_m_n_s32(uint16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshruntq_m_n_s32))) uint16x8_t __arm_vqrshruntq_m(uint16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshruntq_n_s16))) uint8x16_t __arm_vqrshruntq_n_s16(uint8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshruntq_n_s16))) uint8x16_t __arm_vqrshruntq(uint8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshruntq_n_s32))) uint16x8_t __arm_vqrshruntq_n_s32(uint16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshruntq_n_s32))) uint16x8_t __arm_vqrshruntq(uint16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_s16))) int16x8_t __arm_vqshlq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_s16))) int16x8_t __arm_vqshlq_m_n(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_s32))) int32x4_t __arm_vqshlq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_s32))) int32x4_t __arm_vqshlq_m_n(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_s8))) int8x16_t __arm_vqshlq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_s8))) int8x16_t __arm_vqshlq_m_n(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_u16))) uint16x8_t __arm_vqshlq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_u16))) uint16x8_t __arm_vqshlq_m_n(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_u32))) uint32x4_t __arm_vqshlq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_u32))) uint32x4_t __arm_vqshlq_m_n(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_u8))) uint8x16_t __arm_vqshlq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_u8))) uint8x16_t __arm_vqshlq_m_n(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_s16))) int16x8_t __arm_vqshlq_m_r_s16(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_s16))) int16x8_t __arm_vqshlq_m_r(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_s32))) int32x4_t __arm_vqshlq_m_r_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_s32))) int32x4_t __arm_vqshlq_m_r(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_s8))) int8x16_t __arm_vqshlq_m_r_s8(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_s8))) int8x16_t __arm_vqshlq_m_r(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_u16))) uint16x8_t __arm_vqshlq_m_r_u16(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_u16))) uint16x8_t __arm_vqshlq_m_r(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_u32))) uint32x4_t __arm_vqshlq_m_r_u32(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_u32))) uint32x4_t __arm_vqshlq_m_r(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_u8))) uint8x16_t __arm_vqshlq_m_r_u8(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_u8))) uint8x16_t __arm_vqshlq_m_r(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_s16))) int16x8_t __arm_vqshlq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_s16))) int16x8_t __arm_vqshlq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_s32))) int32x4_t __arm_vqshlq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_s32))) int32x4_t __arm_vqshlq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_s8))) int8x16_t __arm_vqshlq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_s8))) int8x16_t __arm_vqshlq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_u16))) uint16x8_t __arm_vqshlq_m_u16(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_u16))) uint16x8_t __arm_vqshlq_m(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_u32))) uint32x4_t __arm_vqshlq_m_u32(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_u32))) uint32x4_t __arm_vqshlq_m(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_u8))) uint8x16_t __arm_vqshlq_m_u8(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_u8))) uint8x16_t __arm_vqshlq_m(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_s16))) int16x8_t __arm_vqshlq_n_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_s16))) int16x8_t __arm_vqshlq_n(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_s32))) int32x4_t __arm_vqshlq_n_s32(int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_s32))) int32x4_t __arm_vqshlq_n(int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_s8))) int8x16_t __arm_vqshlq_n_s8(int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_s8))) int8x16_t __arm_vqshlq_n(int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_u16))) uint16x8_t __arm_vqshlq_n_u16(uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_u16))) uint16x8_t __arm_vqshlq_n(uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_u32))) uint32x4_t __arm_vqshlq_n_u32(uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_u32))) uint32x4_t __arm_vqshlq_n(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_u8))) uint8x16_t __arm_vqshlq_n_u8(uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_u8))) uint8x16_t __arm_vqshlq_n(uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_s16))) int16x8_t __arm_vqshlq_r_s16(int16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_s16))) int16x8_t __arm_vqshlq_r(int16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_s32))) int32x4_t __arm_vqshlq_r_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_s32))) int32x4_t __arm_vqshlq_r(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_s8))) int8x16_t __arm_vqshlq_r_s8(int8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_s8))) int8x16_t __arm_vqshlq_r(int8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_u16))) uint16x8_t __arm_vqshlq_r_u16(uint16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_u16))) uint16x8_t __arm_vqshlq_r(uint16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_u32))) uint32x4_t __arm_vqshlq_r_u32(uint32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_u32))) uint32x4_t __arm_vqshlq_r(uint32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_u8))) uint8x16_t __arm_vqshlq_r_u8(uint8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_u8))) uint8x16_t __arm_vqshlq_r(uint8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_s16))) int16x8_t __arm_vqshlq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_s16))) int16x8_t __arm_vqshlq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_s32))) int32x4_t __arm_vqshlq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_s32))) int32x4_t __arm_vqshlq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_s8))) int8x16_t __arm_vqshlq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_s8))) int8x16_t __arm_vqshlq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_u16))) uint16x8_t __arm_vqshlq_u16(uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_u16))) uint16x8_t __arm_vqshlq(uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_u32))) uint32x4_t __arm_vqshlq_u32(uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_u32))) uint32x4_t __arm_vqshlq(uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_u8))) uint8x16_t __arm_vqshlq_u8(uint8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_u8))) uint8x16_t __arm_vqshlq(uint8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_m_n_s16))) uint16x8_t __arm_vqshluq_m_n_s16(uint16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_m_n_s16))) uint16x8_t __arm_vqshluq_m(uint16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_m_n_s32))) uint32x4_t __arm_vqshluq_m_n_s32(uint32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_m_n_s32))) uint32x4_t __arm_vqshluq_m(uint32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_m_n_s8))) uint8x16_t __arm_vqshluq_m_n_s8(uint8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_m_n_s8))) uint8x16_t __arm_vqshluq_m(uint8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_n_s16))) uint16x8_t __arm_vqshluq_n_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_n_s16))) uint16x8_t __arm_vqshluq(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_n_s32))) uint32x4_t __arm_vqshluq_n_s32(int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_n_s32))) uint32x4_t __arm_vqshluq(int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_n_s8))) uint8x16_t __arm_vqshluq_n_s8(int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_n_s8))) uint8x16_t __arm_vqshluq(int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_m_n_s16))) int8x16_t __arm_vqshrnbq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_m_n_s16))) int8x16_t __arm_vqshrnbq_m(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_m_n_s32))) int16x8_t __arm_vqshrnbq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_m_n_s32))) int16x8_t __arm_vqshrnbq_m(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_m_n_u16))) uint8x16_t __arm_vqshrnbq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_m_n_u16))) uint8x16_t __arm_vqshrnbq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_m_n_u32))) uint16x8_t __arm_vqshrnbq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_m_n_u32))) uint16x8_t __arm_vqshrnbq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_n_s16))) int8x16_t __arm_vqshrnbq_n_s16(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_n_s16))) int8x16_t __arm_vqshrnbq(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_n_s32))) int16x8_t __arm_vqshrnbq_n_s32(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_n_s32))) int16x8_t __arm_vqshrnbq(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_n_u16))) uint8x16_t __arm_vqshrnbq_n_u16(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_n_u16))) uint8x16_t __arm_vqshrnbq(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_n_u32))) uint16x8_t __arm_vqshrnbq_n_u32(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_n_u32))) uint16x8_t __arm_vqshrnbq(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_m_n_s16))) int8x16_t __arm_vqshrntq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_m_n_s16))) int8x16_t __arm_vqshrntq_m(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_m_n_s32))) int16x8_t __arm_vqshrntq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_m_n_s32))) int16x8_t __arm_vqshrntq_m(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_m_n_u16))) uint8x16_t __arm_vqshrntq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_m_n_u16))) uint8x16_t __arm_vqshrntq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_m_n_u32))) uint16x8_t __arm_vqshrntq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_m_n_u32))) uint16x8_t __arm_vqshrntq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_n_s16))) int8x16_t __arm_vqshrntq_n_s16(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_n_s16))) int8x16_t __arm_vqshrntq(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_n_s32))) int16x8_t __arm_vqshrntq_n_s32(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_n_s32))) int16x8_t __arm_vqshrntq(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_n_u16))) uint8x16_t __arm_vqshrntq_n_u16(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_n_u16))) uint8x16_t __arm_vqshrntq(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_n_u32))) uint16x8_t __arm_vqshrntq_n_u32(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_n_u32))) uint16x8_t __arm_vqshrntq(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrunbq_m_n_s16))) uint8x16_t __arm_vqshrunbq_m_n_s16(uint8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrunbq_m_n_s16))) uint8x16_t __arm_vqshrunbq_m(uint8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrunbq_m_n_s32))) uint16x8_t __arm_vqshrunbq_m_n_s32(uint16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrunbq_m_n_s32))) uint16x8_t __arm_vqshrunbq_m(uint16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrunbq_n_s16))) uint8x16_t __arm_vqshrunbq_n_s16(uint8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrunbq_n_s16))) uint8x16_t __arm_vqshrunbq(uint8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrunbq_n_s32))) uint16x8_t __arm_vqshrunbq_n_s32(uint16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrunbq_n_s32))) uint16x8_t __arm_vqshrunbq(uint16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshruntq_m_n_s16))) uint8x16_t __arm_vqshruntq_m_n_s16(uint8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshruntq_m_n_s16))) uint8x16_t __arm_vqshruntq_m(uint8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshruntq_m_n_s32))) uint16x8_t __arm_vqshruntq_m_n_s32(uint16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshruntq_m_n_s32))) uint16x8_t __arm_vqshruntq_m(uint16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshruntq_n_s16))) uint8x16_t __arm_vqshruntq_n_s16(uint8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshruntq_n_s16))) uint8x16_t __arm_vqshruntq(uint8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshruntq_n_s32))) uint16x8_t __arm_vqshruntq_n_s32(uint16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshruntq_n_s32))) uint16x8_t __arm_vqshruntq(uint16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_s16))) int16x8_t __arm_vqsubq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_s16))) int16x8_t __arm_vqsubq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_s32))) int32x4_t __arm_vqsubq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_s32))) int32x4_t __arm_vqsubq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_s8))) int8x16_t __arm_vqsubq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_s8))) int8x16_t __arm_vqsubq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_u16))) uint16x8_t __arm_vqsubq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_u16))) uint16x8_t __arm_vqsubq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_u32))) uint32x4_t __arm_vqsubq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_u32))) uint32x4_t __arm_vqsubq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_u8))) uint8x16_t __arm_vqsubq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_u8))) uint8x16_t __arm_vqsubq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_s16))) int16x8_t __arm_vqsubq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_s16))) int16x8_t __arm_vqsubq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_s32))) int32x4_t __arm_vqsubq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_s32))) int32x4_t __arm_vqsubq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_s8))) int8x16_t __arm_vqsubq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_s8))) int8x16_t __arm_vqsubq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_u16))) uint16x8_t __arm_vqsubq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_u16))) uint16x8_t __arm_vqsubq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_u32))) uint32x4_t __arm_vqsubq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_u32))) uint32x4_t __arm_vqsubq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_u8))) uint8x16_t __arm_vqsubq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_u8))) uint8x16_t __arm_vqsubq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_s16))) int16x8_t __arm_vqsubq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_s16))) int16x8_t __arm_vqsubq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_s32))) int32x4_t __arm_vqsubq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_s32))) int32x4_t __arm_vqsubq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_s8))) int8x16_t __arm_vqsubq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_s8))) int8x16_t __arm_vqsubq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_u16))) uint16x8_t __arm_vqsubq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_u16))) uint16x8_t __arm_vqsubq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_u32))) uint32x4_t __arm_vqsubq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_u32))) uint32x4_t __arm_vqsubq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_u8))) uint8x16_t __arm_vqsubq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_u8))) uint8x16_t __arm_vqsubq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_s16))) int16x8_t __arm_vqsubq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_s16))) int16x8_t __arm_vqsubq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_s32))) int32x4_t __arm_vqsubq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_s32))) int32x4_t __arm_vqsubq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_s8))) int8x16_t __arm_vqsubq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_s8))) int8x16_t __arm_vqsubq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_u16))) uint16x8_t __arm_vqsubq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_u16))) uint16x8_t __arm_vqsubq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_u32))) uint32x4_t __arm_vqsubq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_u32))) uint32x4_t __arm_vqsubq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_u8))) uint8x16_t __arm_vqsubq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_u8))) uint8x16_t __arm_vqsubq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_s32))) int16x8_t __arm_vreinterpretq_s16_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_s32))) int16x8_t __arm_vreinterpretq_s16(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_s64))) int16x8_t __arm_vreinterpretq_s16_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_s64))) int16x8_t __arm_vreinterpretq_s16(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_s8))) int16x8_t __arm_vreinterpretq_s16_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_s8))) int16x8_t __arm_vreinterpretq_s16(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_u16))) int16x8_t __arm_vreinterpretq_s16_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_u16))) int16x8_t __arm_vreinterpretq_s16(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_u32))) int16x8_t __arm_vreinterpretq_s16_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_u32))) int16x8_t __arm_vreinterpretq_s16(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_u64))) int16x8_t __arm_vreinterpretq_s16_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_u64))) int16x8_t __arm_vreinterpretq_s16(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_u8))) int16x8_t __arm_vreinterpretq_s16_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_u8))) int16x8_t __arm_vreinterpretq_s16(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_s16))) int32x4_t __arm_vreinterpretq_s32_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_s16))) int32x4_t __arm_vreinterpretq_s32(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_s64))) int32x4_t __arm_vreinterpretq_s32_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_s64))) int32x4_t __arm_vreinterpretq_s32(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_s8))) int32x4_t __arm_vreinterpretq_s32_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_s8))) int32x4_t __arm_vreinterpretq_s32(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_u16))) int32x4_t __arm_vreinterpretq_s32_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_u16))) int32x4_t __arm_vreinterpretq_s32(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_u32))) int32x4_t __arm_vreinterpretq_s32_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_u32))) int32x4_t __arm_vreinterpretq_s32(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_u64))) int32x4_t __arm_vreinterpretq_s32_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_u64))) int32x4_t __arm_vreinterpretq_s32(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_u8))) int32x4_t __arm_vreinterpretq_s32_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_u8))) int32x4_t __arm_vreinterpretq_s32(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_s16))) int64x2_t __arm_vreinterpretq_s64_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_s16))) int64x2_t __arm_vreinterpretq_s64(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_s32))) int64x2_t __arm_vreinterpretq_s64_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_s32))) int64x2_t __arm_vreinterpretq_s64(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_s8))) int64x2_t __arm_vreinterpretq_s64_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_s8))) int64x2_t __arm_vreinterpretq_s64(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_u16))) int64x2_t __arm_vreinterpretq_s64_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_u16))) int64x2_t __arm_vreinterpretq_s64(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_u32))) int64x2_t __arm_vreinterpretq_s64_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_u32))) int64x2_t __arm_vreinterpretq_s64(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_u64))) int64x2_t __arm_vreinterpretq_s64_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_u64))) int64x2_t __arm_vreinterpretq_s64(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_u8))) int64x2_t __arm_vreinterpretq_s64_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_u8))) int64x2_t __arm_vreinterpretq_s64(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_s16))) int8x16_t __arm_vreinterpretq_s8_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_s16))) int8x16_t __arm_vreinterpretq_s8(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_s32))) int8x16_t __arm_vreinterpretq_s8_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_s32))) int8x16_t __arm_vreinterpretq_s8(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_s64))) int8x16_t __arm_vreinterpretq_s8_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_s64))) int8x16_t __arm_vreinterpretq_s8(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_u16))) int8x16_t __arm_vreinterpretq_s8_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_u16))) int8x16_t __arm_vreinterpretq_s8(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_u32))) int8x16_t __arm_vreinterpretq_s8_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_u32))) int8x16_t __arm_vreinterpretq_s8(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_u64))) int8x16_t __arm_vreinterpretq_s8_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_u64))) int8x16_t __arm_vreinterpretq_s8(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_u8))) int8x16_t __arm_vreinterpretq_s8_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_u8))) int8x16_t __arm_vreinterpretq_s8(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_s16))) uint16x8_t __arm_vreinterpretq_u16_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_s16))) uint16x8_t __arm_vreinterpretq_u16(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_s32))) uint16x8_t __arm_vreinterpretq_u16_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_s32))) uint16x8_t __arm_vreinterpretq_u16(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_s64))) uint16x8_t __arm_vreinterpretq_u16_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_s64))) uint16x8_t __arm_vreinterpretq_u16(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_s8))) uint16x8_t __arm_vreinterpretq_u16_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_s8))) uint16x8_t __arm_vreinterpretq_u16(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_u32))) uint16x8_t __arm_vreinterpretq_u16_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_u32))) uint16x8_t __arm_vreinterpretq_u16(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_u64))) uint16x8_t __arm_vreinterpretq_u16_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_u64))) uint16x8_t __arm_vreinterpretq_u16(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_u8))) uint16x8_t __arm_vreinterpretq_u16_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_u8))) uint16x8_t __arm_vreinterpretq_u16(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_s16))) uint32x4_t __arm_vreinterpretq_u32_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_s16))) uint32x4_t __arm_vreinterpretq_u32(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_s32))) uint32x4_t __arm_vreinterpretq_u32_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_s32))) uint32x4_t __arm_vreinterpretq_u32(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_s64))) uint32x4_t __arm_vreinterpretq_u32_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_s64))) uint32x4_t __arm_vreinterpretq_u32(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_s8))) uint32x4_t __arm_vreinterpretq_u32_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_s8))) uint32x4_t __arm_vreinterpretq_u32(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_u16))) uint32x4_t __arm_vreinterpretq_u32_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_u16))) uint32x4_t __arm_vreinterpretq_u32(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_u64))) uint32x4_t __arm_vreinterpretq_u32_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_u64))) uint32x4_t __arm_vreinterpretq_u32(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_u8))) uint32x4_t __arm_vreinterpretq_u32_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_u8))) uint32x4_t __arm_vreinterpretq_u32(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_s16))) uint64x2_t __arm_vreinterpretq_u64_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_s16))) uint64x2_t __arm_vreinterpretq_u64(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_s32))) uint64x2_t __arm_vreinterpretq_u64_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_s32))) uint64x2_t __arm_vreinterpretq_u64(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_s64))) uint64x2_t __arm_vreinterpretq_u64_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_s64))) uint64x2_t __arm_vreinterpretq_u64(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_s8))) uint64x2_t __arm_vreinterpretq_u64_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_s8))) uint64x2_t __arm_vreinterpretq_u64(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_u16))) uint64x2_t __arm_vreinterpretq_u64_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_u16))) uint64x2_t __arm_vreinterpretq_u64(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_u32))) uint64x2_t __arm_vreinterpretq_u64_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_u32))) uint64x2_t __arm_vreinterpretq_u64(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_u8))) uint64x2_t __arm_vreinterpretq_u64_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_u8))) uint64x2_t __arm_vreinterpretq_u64(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s16))) uint8x16_t __arm_vreinterpretq_u8_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s16))) uint8x16_t __arm_vreinterpretq_u8(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s32))) uint8x16_t __arm_vreinterpretq_u8_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s32))) uint8x16_t __arm_vreinterpretq_u8(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s64))) uint8x16_t __arm_vreinterpretq_u8_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s64))) uint8x16_t __arm_vreinterpretq_u8(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s8))) uint8x16_t __arm_vreinterpretq_u8_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s8))) uint8x16_t __arm_vreinterpretq_u8(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_u16))) uint8x16_t __arm_vreinterpretq_u8_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_u16))) uint8x16_t __arm_vreinterpretq_u8(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_u32))) uint8x16_t __arm_vreinterpretq_u8_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_u32))) uint8x16_t __arm_vreinterpretq_u8(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_u64))) uint8x16_t __arm_vreinterpretq_u8_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_u64))) uint8x16_t __arm_vreinterpretq_u8(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_m_s8))) int8x16_t __arm_vrev16q_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_m_s8))) int8x16_t __arm_vrev16q_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_m_u8))) uint8x16_t __arm_vrev16q_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_m_u8))) uint8x16_t __arm_vrev16q_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_s8))) int8x16_t __arm_vrev16q_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_s8))) int8x16_t __arm_vrev16q(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_u8))) uint8x16_t __arm_vrev16q_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_u8))) uint8x16_t __arm_vrev16q(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_x_s8))) int8x16_t __arm_vrev16q_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_x_s8))) int8x16_t __arm_vrev16q_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_x_u8))) uint8x16_t __arm_vrev16q_x_u8(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_x_u8))) uint8x16_t __arm_vrev16q_x(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_s16))) int16x8_t __arm_vrev32q_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_s16))) int16x8_t __arm_vrev32q_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_s8))) int8x16_t __arm_vrev32q_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_s8))) int8x16_t __arm_vrev32q_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_u16))) uint16x8_t __arm_vrev32q_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_u16))) uint16x8_t __arm_vrev32q_m(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_u8))) uint8x16_t __arm_vrev32q_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_u8))) uint8x16_t __arm_vrev32q_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_s16))) int16x8_t __arm_vrev32q_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_s16))) int16x8_t __arm_vrev32q(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_s8))) int8x16_t __arm_vrev32q_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_s8))) int8x16_t __arm_vrev32q(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_u16))) uint16x8_t __arm_vrev32q_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_u16))) uint16x8_t __arm_vrev32q(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_u8))) uint8x16_t __arm_vrev32q_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_u8))) uint8x16_t __arm_vrev32q(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_s16))) int16x8_t __arm_vrev32q_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_s16))) int16x8_t __arm_vrev32q_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_s8))) int8x16_t __arm_vrev32q_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_s8))) int8x16_t __arm_vrev32q_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_u16))) uint16x8_t __arm_vrev32q_x_u16(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_u16))) uint16x8_t __arm_vrev32q_x(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_u8))) uint8x16_t __arm_vrev32q_x_u8(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_u8))) uint8x16_t __arm_vrev32q_x(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_s16))) int16x8_t __arm_vrev64q_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_s16))) int16x8_t __arm_vrev64q_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_s32))) int32x4_t __arm_vrev64q_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_s32))) int32x4_t __arm_vrev64q_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_s8))) int8x16_t __arm_vrev64q_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_s8))) int8x16_t __arm_vrev64q_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_u16))) uint16x8_t __arm_vrev64q_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_u16))) uint16x8_t __arm_vrev64q_m(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_u32))) uint32x4_t __arm_vrev64q_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_u32))) uint32x4_t __arm_vrev64q_m(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_u8))) uint8x16_t __arm_vrev64q_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_u8))) uint8x16_t __arm_vrev64q_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_s16))) int16x8_t __arm_vrev64q_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_s16))) int16x8_t __arm_vrev64q(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_s32))) int32x4_t __arm_vrev64q_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_s32))) int32x4_t __arm_vrev64q(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_s8))) int8x16_t __arm_vrev64q_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_s8))) int8x16_t __arm_vrev64q(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_u16))) uint16x8_t __arm_vrev64q_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_u16))) uint16x8_t __arm_vrev64q(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_u32))) uint32x4_t __arm_vrev64q_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_u32))) uint32x4_t __arm_vrev64q(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_u8))) uint8x16_t __arm_vrev64q_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_u8))) uint8x16_t __arm_vrev64q(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_s16))) int16x8_t __arm_vrev64q_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_s16))) int16x8_t __arm_vrev64q_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_s32))) int32x4_t __arm_vrev64q_x_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_s32))) int32x4_t __arm_vrev64q_x(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_s8))) int8x16_t __arm_vrev64q_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_s8))) int8x16_t __arm_vrev64q_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_u16))) uint16x8_t __arm_vrev64q_x_u16(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_u16))) uint16x8_t __arm_vrev64q_x(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_u32))) uint32x4_t __arm_vrev64q_x_u32(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_u32))) uint32x4_t __arm_vrev64q_x(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_u8))) uint8x16_t __arm_vrev64q_x_u8(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_u8))) uint8x16_t __arm_vrev64q_x(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_s16))) int16x8_t __arm_vrhaddq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_s16))) int16x8_t __arm_vrhaddq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_s32))) int32x4_t __arm_vrhaddq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_s32))) int32x4_t __arm_vrhaddq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_s8))) int8x16_t __arm_vrhaddq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_s8))) int8x16_t __arm_vrhaddq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_u16))) uint16x8_t __arm_vrhaddq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_u16))) uint16x8_t __arm_vrhaddq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_u32))) uint32x4_t __arm_vrhaddq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_u32))) uint32x4_t __arm_vrhaddq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_u8))) uint8x16_t __arm_vrhaddq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_u8))) uint8x16_t __arm_vrhaddq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_s16))) int16x8_t __arm_vrhaddq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_s16))) int16x8_t __arm_vrhaddq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_s32))) int32x4_t __arm_vrhaddq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_s32))) int32x4_t __arm_vrhaddq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_s8))) int8x16_t __arm_vrhaddq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_s8))) int8x16_t __arm_vrhaddq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_u16))) uint16x8_t __arm_vrhaddq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_u16))) uint16x8_t __arm_vrhaddq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_u32))) uint32x4_t __arm_vrhaddq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_u32))) uint32x4_t __arm_vrhaddq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_u8))) uint8x16_t __arm_vrhaddq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_u8))) uint8x16_t __arm_vrhaddq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_s16))) int16x8_t __arm_vrhaddq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_s16))) int16x8_t __arm_vrhaddq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_s32))) int32x4_t __arm_vrhaddq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_s32))) int32x4_t __arm_vrhaddq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_s8))) int8x16_t __arm_vrhaddq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_s8))) int8x16_t __arm_vrhaddq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_u16))) uint16x8_t __arm_vrhaddq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_u16))) uint16x8_t __arm_vrhaddq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_u32))) uint32x4_t __arm_vrhaddq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_u32))) uint32x4_t __arm_vrhaddq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_u8))) uint8x16_t __arm_vrhaddq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_u8))) uint8x16_t __arm_vrhaddq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaq_p_s32))) int64_t __arm_vrmlaldavhaq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaq_p_s32))) int64_t __arm_vrmlaldavhaq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaq_p_u32))) uint64_t __arm_vrmlaldavhaq_p_u32(uint64_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaq_p_u32))) uint64_t __arm_vrmlaldavhaq_p(uint64_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaq_s32))) int64_t __arm_vrmlaldavhaq_s32(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaq_s32))) int64_t __arm_vrmlaldavhaq(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaq_u32))) uint64_t __arm_vrmlaldavhaq_u32(uint64_t, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaq_u32))) uint64_t __arm_vrmlaldavhaq(uint64_t, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaxq_p_s32))) int64_t __arm_vrmlaldavhaxq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaxq_p_s32))) int64_t __arm_vrmlaldavhaxq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaxq_s32))) int64_t __arm_vrmlaldavhaxq_s32(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaxq_s32))) int64_t __arm_vrmlaldavhaxq(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhq_p_s32))) int64_t __arm_vrmlaldavhq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhq_p_s32))) int64_t __arm_vrmlaldavhq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhq_p_u32))) uint64_t __arm_vrmlaldavhq_p_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhq_p_u32))) uint64_t __arm_vrmlaldavhq_p(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhq_s32))) int64_t __arm_vrmlaldavhq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhq_s32))) int64_t __arm_vrmlaldavhq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhq_u32))) uint64_t __arm_vrmlaldavhq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhq_u32))) uint64_t __arm_vrmlaldavhq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhxq_p_s32))) int64_t __arm_vrmlaldavhxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhxq_p_s32))) int64_t __arm_vrmlaldavhxq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhxq_s32))) int64_t __arm_vrmlaldavhxq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhxq_s32))) int64_t __arm_vrmlaldavhxq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhaq_p_s32))) int64_t __arm_vrmlsldavhaq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhaq_p_s32))) int64_t __arm_vrmlsldavhaq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhaq_s32))) int64_t __arm_vrmlsldavhaq_s32(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhaq_s32))) int64_t __arm_vrmlsldavhaq(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhaxq_p_s32))) int64_t __arm_vrmlsldavhaxq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhaxq_p_s32))) int64_t __arm_vrmlsldavhaxq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhaxq_s32))) int64_t __arm_vrmlsldavhaxq_s32(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhaxq_s32))) int64_t __arm_vrmlsldavhaxq(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhq_p_s32))) int64_t __arm_vrmlsldavhq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhq_p_s32))) int64_t __arm_vrmlsldavhq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhq_s32))) int64_t __arm_vrmlsldavhq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhq_s32))) int64_t __arm_vrmlsldavhq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhxq_p_s32))) int64_t __arm_vrmlsldavhxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhxq_p_s32))) int64_t __arm_vrmlsldavhxq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhxq_s32))) int64_t __arm_vrmlsldavhxq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhxq_s32))) int64_t __arm_vrmlsldavhxq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_s16))) int16x8_t __arm_vrmulhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_s16))) int16x8_t __arm_vrmulhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_s32))) int32x4_t __arm_vrmulhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_s32))) int32x4_t __arm_vrmulhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_s8))) int8x16_t __arm_vrmulhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_s8))) int8x16_t __arm_vrmulhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_u16))) uint16x8_t __arm_vrmulhq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_u16))) uint16x8_t __arm_vrmulhq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_u32))) uint32x4_t __arm_vrmulhq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_u32))) uint32x4_t __arm_vrmulhq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_u8))) uint8x16_t __arm_vrmulhq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_u8))) uint8x16_t __arm_vrmulhq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_s16))) int16x8_t __arm_vrmulhq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_s16))) int16x8_t __arm_vrmulhq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_s32))) int32x4_t __arm_vrmulhq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_s32))) int32x4_t __arm_vrmulhq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_s8))) int8x16_t __arm_vrmulhq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_s8))) int8x16_t __arm_vrmulhq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_u16))) uint16x8_t __arm_vrmulhq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_u16))) uint16x8_t __arm_vrmulhq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_u32))) uint32x4_t __arm_vrmulhq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_u32))) uint32x4_t __arm_vrmulhq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_u8))) uint8x16_t __arm_vrmulhq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_u8))) uint8x16_t __arm_vrmulhq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_s16))) int16x8_t __arm_vrmulhq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_s16))) int16x8_t __arm_vrmulhq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_s32))) int32x4_t __arm_vrmulhq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_s32))) int32x4_t __arm_vrmulhq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_s8))) int8x16_t __arm_vrmulhq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_s8))) int8x16_t __arm_vrmulhq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_u16))) uint16x8_t __arm_vrmulhq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_u16))) uint16x8_t __arm_vrmulhq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_u32))) uint32x4_t __arm_vrmulhq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_u32))) uint32x4_t __arm_vrmulhq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_u8))) uint8x16_t __arm_vrmulhq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_u8))) uint8x16_t __arm_vrmulhq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_s16))) int16x8_t __arm_vrshlq_m_n_s16(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_s16))) int16x8_t __arm_vrshlq_m_n(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_s32))) int32x4_t __arm_vrshlq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_s32))) int32x4_t __arm_vrshlq_m_n(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_s8))) int8x16_t __arm_vrshlq_m_n_s8(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_s8))) int8x16_t __arm_vrshlq_m_n(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_u16))) uint16x8_t __arm_vrshlq_m_n_u16(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_u16))) uint16x8_t __arm_vrshlq_m_n(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_u32))) uint32x4_t __arm_vrshlq_m_n_u32(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_u32))) uint32x4_t __arm_vrshlq_m_n(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_u8))) uint8x16_t __arm_vrshlq_m_n_u8(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_u8))) uint8x16_t __arm_vrshlq_m_n(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_s16))) int16x8_t __arm_vrshlq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_s16))) int16x8_t __arm_vrshlq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_s32))) int32x4_t __arm_vrshlq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_s32))) int32x4_t __arm_vrshlq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_s8))) int8x16_t __arm_vrshlq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_s8))) int8x16_t __arm_vrshlq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_u16))) uint16x8_t __arm_vrshlq_m_u16(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_u16))) uint16x8_t __arm_vrshlq_m(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_u32))) uint32x4_t __arm_vrshlq_m_u32(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_u32))) uint32x4_t __arm_vrshlq_m(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_u8))) uint8x16_t __arm_vrshlq_m_u8(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_u8))) uint8x16_t __arm_vrshlq_m(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_s16))) int16x8_t __arm_vrshlq_n_s16(int16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_s16))) int16x8_t __arm_vrshlq(int16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_s32))) int32x4_t __arm_vrshlq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_s32))) int32x4_t __arm_vrshlq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_s8))) int8x16_t __arm_vrshlq_n_s8(int8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_s8))) int8x16_t __arm_vrshlq(int8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_u16))) uint16x8_t __arm_vrshlq_n_u16(uint16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_u16))) uint16x8_t __arm_vrshlq(uint16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_u32))) uint32x4_t __arm_vrshlq_n_u32(uint32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_u32))) uint32x4_t __arm_vrshlq(uint32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_u8))) uint8x16_t __arm_vrshlq_n_u8(uint8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_u8))) uint8x16_t __arm_vrshlq(uint8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_s16))) int16x8_t __arm_vrshlq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_s16))) int16x8_t __arm_vrshlq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_s32))) int32x4_t __arm_vrshlq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_s32))) int32x4_t __arm_vrshlq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_s8))) int8x16_t __arm_vrshlq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_s8))) int8x16_t __arm_vrshlq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_u16))) uint16x8_t __arm_vrshlq_u16(uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_u16))) uint16x8_t __arm_vrshlq(uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_u32))) uint32x4_t __arm_vrshlq_u32(uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_u32))) uint32x4_t __arm_vrshlq(uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_u8))) uint8x16_t __arm_vrshlq_u8(uint8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_u8))) uint8x16_t __arm_vrshlq(uint8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_s16))) int16x8_t __arm_vrshlq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_s16))) int16x8_t __arm_vrshlq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_s32))) int32x4_t __arm_vrshlq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_s32))) int32x4_t __arm_vrshlq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_s8))) int8x16_t __arm_vrshlq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_s8))) int8x16_t __arm_vrshlq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_u16))) uint16x8_t __arm_vrshlq_x_u16(uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_u16))) uint16x8_t __arm_vrshlq_x(uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_u32))) uint32x4_t __arm_vrshlq_x_u32(uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_u32))) uint32x4_t __arm_vrshlq_x(uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_u8))) uint8x16_t __arm_vrshlq_x_u8(uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_u8))) uint8x16_t __arm_vrshlq_x(uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_m_n_s16))) int8x16_t __arm_vrshrnbq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_m_n_s16))) int8x16_t __arm_vrshrnbq_m(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_m_n_s32))) int16x8_t __arm_vrshrnbq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_m_n_s32))) int16x8_t __arm_vrshrnbq_m(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_m_n_u16))) uint8x16_t __arm_vrshrnbq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_m_n_u16))) uint8x16_t __arm_vrshrnbq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_m_n_u32))) uint16x8_t __arm_vrshrnbq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_m_n_u32))) uint16x8_t __arm_vrshrnbq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_n_s16))) int8x16_t __arm_vrshrnbq_n_s16(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_n_s16))) int8x16_t __arm_vrshrnbq(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_n_s32))) int16x8_t __arm_vrshrnbq_n_s32(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_n_s32))) int16x8_t __arm_vrshrnbq(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_n_u16))) uint8x16_t __arm_vrshrnbq_n_u16(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_n_u16))) uint8x16_t __arm_vrshrnbq(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_n_u32))) uint16x8_t __arm_vrshrnbq_n_u32(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_n_u32))) uint16x8_t __arm_vrshrnbq(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_m_n_s16))) int8x16_t __arm_vrshrntq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_m_n_s16))) int8x16_t __arm_vrshrntq_m(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_m_n_s32))) int16x8_t __arm_vrshrntq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_m_n_s32))) int16x8_t __arm_vrshrntq_m(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_m_n_u16))) uint8x16_t __arm_vrshrntq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_m_n_u16))) uint8x16_t __arm_vrshrntq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_m_n_u32))) uint16x8_t __arm_vrshrntq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_m_n_u32))) uint16x8_t __arm_vrshrntq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_n_s16))) int8x16_t __arm_vrshrntq_n_s16(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_n_s16))) int8x16_t __arm_vrshrntq(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_n_s32))) int16x8_t __arm_vrshrntq_n_s32(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_n_s32))) int16x8_t __arm_vrshrntq(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_n_u16))) uint8x16_t __arm_vrshrntq_n_u16(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_n_u16))) uint8x16_t __arm_vrshrntq(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_n_u32))) uint16x8_t __arm_vrshrntq_n_u32(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_n_u32))) uint16x8_t __arm_vrshrntq(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_s16))) int16x8_t __arm_vrshrq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_s16))) int16x8_t __arm_vrshrq_m(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_s32))) int32x4_t __arm_vrshrq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_s32))) int32x4_t __arm_vrshrq_m(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_s8))) int8x16_t __arm_vrshrq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_s8))) int8x16_t __arm_vrshrq_m(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_u16))) uint16x8_t __arm_vrshrq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_u16))) uint16x8_t __arm_vrshrq_m(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_u32))) uint32x4_t __arm_vrshrq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_u32))) uint32x4_t __arm_vrshrq_m(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_u8))) uint8x16_t __arm_vrshrq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_u8))) uint8x16_t __arm_vrshrq_m(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_s16))) int16x8_t __arm_vrshrq_n_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_s16))) int16x8_t __arm_vrshrq(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_s32))) int32x4_t __arm_vrshrq_n_s32(int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_s32))) int32x4_t __arm_vrshrq(int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_s8))) int8x16_t __arm_vrshrq_n_s8(int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_s8))) int8x16_t __arm_vrshrq(int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_u16))) uint16x8_t __arm_vrshrq_n_u16(uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_u16))) uint16x8_t __arm_vrshrq(uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_u32))) uint32x4_t __arm_vrshrq_n_u32(uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_u32))) uint32x4_t __arm_vrshrq(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_u8))) uint8x16_t __arm_vrshrq_n_u8(uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_u8))) uint8x16_t __arm_vrshrq(uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_s16))) int16x8_t __arm_vrshrq_x_n_s16(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_s16))) int16x8_t __arm_vrshrq_x(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_s32))) int32x4_t __arm_vrshrq_x_n_s32(int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_s32))) int32x4_t __arm_vrshrq_x(int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_s8))) int8x16_t __arm_vrshrq_x_n_s8(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_s8))) int8x16_t __arm_vrshrq_x(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_u16))) uint16x8_t __arm_vrshrq_x_n_u16(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_u16))) uint16x8_t __arm_vrshrq_x(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_u32))) uint32x4_t __arm_vrshrq_x_n_u32(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_u32))) uint32x4_t __arm_vrshrq_x(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_u8))) uint8x16_t __arm_vrshrq_x_n_u8(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_u8))) uint8x16_t __arm_vrshrq_x(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsbciq_m_s32))) int32x4_t __arm_vsbciq_m_s32(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsbciq_m_s32))) int32x4_t __arm_vsbciq_m(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsbciq_m_u32))) uint32x4_t __arm_vsbciq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsbciq_m_u32))) uint32x4_t __arm_vsbciq_m(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsbciq_s32))) int32x4_t __arm_vsbciq_s32(int32x4_t, int32x4_t, unsigned *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsbciq_s32))) int32x4_t __arm_vsbciq(int32x4_t, int32x4_t, unsigned *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsbciq_u32))) uint32x4_t __arm_vsbciq_u32(uint32x4_t, uint32x4_t, unsigned *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsbciq_u32))) uint32x4_t __arm_vsbciq(uint32x4_t, uint32x4_t, unsigned *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsbcq_m_s32))) int32x4_t __arm_vsbcq_m_s32(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsbcq_m_s32))) int32x4_t __arm_vsbcq_m(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsbcq_m_u32))) uint32x4_t __arm_vsbcq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsbcq_m_u32))) uint32x4_t __arm_vsbcq_m(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsbcq_s32))) int32x4_t __arm_vsbcq_s32(int32x4_t, int32x4_t, unsigned *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsbcq_s32))) int32x4_t __arm_vsbcq(int32x4_t, int32x4_t, unsigned *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsbcq_u32))) uint32x4_t __arm_vsbcq_u32(uint32x4_t, uint32x4_t, unsigned *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsbcq_u32))) uint32x4_t __arm_vsbcq(uint32x4_t, uint32x4_t, unsigned *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_s16))) int16x8_t __arm_vsetq_lane_s16(int16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_s16))) int16x8_t __arm_vsetq_lane(int16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_s32))) int32x4_t __arm_vsetq_lane_s32(int32_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_s32))) int32x4_t __arm_vsetq_lane(int32_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_s64))) int64x2_t __arm_vsetq_lane_s64(int64_t, int64x2_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_s64))) int64x2_t __arm_vsetq_lane(int64_t, int64x2_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_s8))) int8x16_t __arm_vsetq_lane_s8(int8_t, int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_s8))) int8x16_t __arm_vsetq_lane(int8_t, int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_u16))) uint16x8_t __arm_vsetq_lane_u16(uint16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_u16))) uint16x8_t __arm_vsetq_lane(uint16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_u32))) uint32x4_t __arm_vsetq_lane_u32(uint32_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_u32))) uint32x4_t __arm_vsetq_lane(uint32_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_u64))) uint64x2_t __arm_vsetq_lane_u64(uint64_t, uint64x2_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_u64))) uint64x2_t __arm_vsetq_lane(uint64_t, uint64x2_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_u8))) uint8x16_t __arm_vsetq_lane_u8(uint8_t, uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_u8))) uint8x16_t __arm_vsetq_lane(uint8_t, uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_s16))) int16x8_t __arm_vshlcq_m_s16(int16x8_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_s16))) int16x8_t __arm_vshlcq_m(int16x8_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_s32))) int32x4_t __arm_vshlcq_m_s32(int32x4_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_s32))) int32x4_t __arm_vshlcq_m(int32x4_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_s8))) int8x16_t __arm_vshlcq_m_s8(int8x16_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_s8))) int8x16_t __arm_vshlcq_m(int8x16_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_u16))) uint16x8_t __arm_vshlcq_m_u16(uint16x8_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_u16))) uint16x8_t __arm_vshlcq_m(uint16x8_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_u32))) uint32x4_t __arm_vshlcq_m_u32(uint32x4_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_u32))) uint32x4_t __arm_vshlcq_m(uint32x4_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_u8))) uint8x16_t __arm_vshlcq_m_u8(uint8x16_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_u8))) uint8x16_t __arm_vshlcq_m(uint8x16_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_s16))) int16x8_t __arm_vshlcq_s16(int16x8_t, uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_s16))) int16x8_t __arm_vshlcq(int16x8_t, uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_s32))) int32x4_t __arm_vshlcq_s32(int32x4_t, uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_s32))) int32x4_t __arm_vshlcq(int32x4_t, uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_s8))) int8x16_t __arm_vshlcq_s8(int8x16_t, uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_s8))) int8x16_t __arm_vshlcq(int8x16_t, uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_u16))) uint16x8_t __arm_vshlcq_u16(uint16x8_t, uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_u16))) uint16x8_t __arm_vshlcq(uint16x8_t, uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_u32))) uint32x4_t __arm_vshlcq_u32(uint32x4_t, uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_u32))) uint32x4_t __arm_vshlcq(uint32x4_t, uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_u8))) uint8x16_t __arm_vshlcq_u8(uint8x16_t, uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_u8))) uint8x16_t __arm_vshlcq(uint8x16_t, uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_m_n_s16))) int32x4_t __arm_vshllbq_m_n_s16(int32x4_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_m_n_s16))) int32x4_t __arm_vshllbq_m(int32x4_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_m_n_s8))) int16x8_t __arm_vshllbq_m_n_s8(int16x8_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_m_n_s8))) int16x8_t __arm_vshllbq_m(int16x8_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_m_n_u16))) uint32x4_t __arm_vshllbq_m_n_u16(uint32x4_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_m_n_u16))) uint32x4_t __arm_vshllbq_m(uint32x4_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_m_n_u8))) uint16x8_t __arm_vshllbq_m_n_u8(uint16x8_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_m_n_u8))) uint16x8_t __arm_vshllbq_m(uint16x8_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_n_s16))) int32x4_t __arm_vshllbq_n_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_n_s16))) int32x4_t __arm_vshllbq(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_n_s8))) int16x8_t __arm_vshllbq_n_s8(int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_n_s8))) int16x8_t __arm_vshllbq(int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_n_u16))) uint32x4_t __arm_vshllbq_n_u16(uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_n_u16))) uint32x4_t __arm_vshllbq(uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_n_u8))) uint16x8_t __arm_vshllbq_n_u8(uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_n_u8))) uint16x8_t __arm_vshllbq(uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_x_n_s16))) int32x4_t __arm_vshllbq_x_n_s16(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_x_n_s16))) int32x4_t __arm_vshllbq_x(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_x_n_s8))) int16x8_t __arm_vshllbq_x_n_s8(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_x_n_s8))) int16x8_t __arm_vshllbq_x(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_x_n_u16))) uint32x4_t __arm_vshllbq_x_n_u16(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_x_n_u16))) uint32x4_t __arm_vshllbq_x(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_x_n_u8))) uint16x8_t __arm_vshllbq_x_n_u8(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_x_n_u8))) uint16x8_t __arm_vshllbq_x(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_m_n_s16))) int32x4_t __arm_vshlltq_m_n_s16(int32x4_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_m_n_s16))) int32x4_t __arm_vshlltq_m(int32x4_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_m_n_s8))) int16x8_t __arm_vshlltq_m_n_s8(int16x8_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_m_n_s8))) int16x8_t __arm_vshlltq_m(int16x8_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_m_n_u16))) uint32x4_t __arm_vshlltq_m_n_u16(uint32x4_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_m_n_u16))) uint32x4_t __arm_vshlltq_m(uint32x4_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_m_n_u8))) uint16x8_t __arm_vshlltq_m_n_u8(uint16x8_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_m_n_u8))) uint16x8_t __arm_vshlltq_m(uint16x8_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_n_s16))) int32x4_t __arm_vshlltq_n_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_n_s16))) int32x4_t __arm_vshlltq(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_n_s8))) int16x8_t __arm_vshlltq_n_s8(int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_n_s8))) int16x8_t __arm_vshlltq(int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_n_u16))) uint32x4_t __arm_vshlltq_n_u16(uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_n_u16))) uint32x4_t __arm_vshlltq(uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_n_u8))) uint16x8_t __arm_vshlltq_n_u8(uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_n_u8))) uint16x8_t __arm_vshlltq(uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_x_n_s16))) int32x4_t __arm_vshlltq_x_n_s16(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_x_n_s16))) int32x4_t __arm_vshlltq_x(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_x_n_s8))) int16x8_t __arm_vshlltq_x_n_s8(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_x_n_s8))) int16x8_t __arm_vshlltq_x(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_x_n_u16))) uint32x4_t __arm_vshlltq_x_n_u16(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_x_n_u16))) uint32x4_t __arm_vshlltq_x(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_x_n_u8))) uint16x8_t __arm_vshlltq_x_n_u8(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_x_n_u8))) uint16x8_t __arm_vshlltq_x(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_s16))) int16x8_t __arm_vshlq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_s16))) int16x8_t __arm_vshlq_m_n(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_s32))) int32x4_t __arm_vshlq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_s32))) int32x4_t __arm_vshlq_m_n(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_s8))) int8x16_t __arm_vshlq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_s8))) int8x16_t __arm_vshlq_m_n(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_u16))) uint16x8_t __arm_vshlq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_u16))) uint16x8_t __arm_vshlq_m_n(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_u32))) uint32x4_t __arm_vshlq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_u32))) uint32x4_t __arm_vshlq_m_n(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_u8))) uint8x16_t __arm_vshlq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_u8))) uint8x16_t __arm_vshlq_m_n(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_s16))) int16x8_t __arm_vshlq_m_r_s16(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_s16))) int16x8_t __arm_vshlq_m_r(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_s32))) int32x4_t __arm_vshlq_m_r_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_s32))) int32x4_t __arm_vshlq_m_r(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_s8))) int8x16_t __arm_vshlq_m_r_s8(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_s8))) int8x16_t __arm_vshlq_m_r(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_u16))) uint16x8_t __arm_vshlq_m_r_u16(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_u16))) uint16x8_t __arm_vshlq_m_r(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_u32))) uint32x4_t __arm_vshlq_m_r_u32(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_u32))) uint32x4_t __arm_vshlq_m_r(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_u8))) uint8x16_t __arm_vshlq_m_r_u8(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_u8))) uint8x16_t __arm_vshlq_m_r(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_s16))) int16x8_t __arm_vshlq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_s16))) int16x8_t __arm_vshlq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_s32))) int32x4_t __arm_vshlq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_s32))) int32x4_t __arm_vshlq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_s8))) int8x16_t __arm_vshlq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_s8))) int8x16_t __arm_vshlq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_u16))) uint16x8_t __arm_vshlq_m_u16(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_u16))) uint16x8_t __arm_vshlq_m(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_u32))) uint32x4_t __arm_vshlq_m_u32(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_u32))) uint32x4_t __arm_vshlq_m(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_u8))) uint8x16_t __arm_vshlq_m_u8(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_u8))) uint8x16_t __arm_vshlq_m(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_s16))) int16x8_t __arm_vshlq_n_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_s16))) int16x8_t __arm_vshlq_n(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_s32))) int32x4_t __arm_vshlq_n_s32(int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_s32))) int32x4_t __arm_vshlq_n(int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_s8))) int8x16_t __arm_vshlq_n_s8(int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_s8))) int8x16_t __arm_vshlq_n(int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_u16))) uint16x8_t __arm_vshlq_n_u16(uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_u16))) uint16x8_t __arm_vshlq_n(uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_u32))) uint32x4_t __arm_vshlq_n_u32(uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_u32))) uint32x4_t __arm_vshlq_n(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_u8))) uint8x16_t __arm_vshlq_n_u8(uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_u8))) uint8x16_t __arm_vshlq_n(uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_s16))) int16x8_t __arm_vshlq_r_s16(int16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_s16))) int16x8_t __arm_vshlq_r(int16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_s32))) int32x4_t __arm_vshlq_r_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_s32))) int32x4_t __arm_vshlq_r(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_s8))) int8x16_t __arm_vshlq_r_s8(int8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_s8))) int8x16_t __arm_vshlq_r(int8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_u16))) uint16x8_t __arm_vshlq_r_u16(uint16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_u16))) uint16x8_t __arm_vshlq_r(uint16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_u32))) uint32x4_t __arm_vshlq_r_u32(uint32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_u32))) uint32x4_t __arm_vshlq_r(uint32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_u8))) uint8x16_t __arm_vshlq_r_u8(uint8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_u8))) uint8x16_t __arm_vshlq_r(uint8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_s16))) int16x8_t __arm_vshlq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_s16))) int16x8_t __arm_vshlq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_s32))) int32x4_t __arm_vshlq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_s32))) int32x4_t __arm_vshlq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_s8))) int8x16_t __arm_vshlq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_s8))) int8x16_t __arm_vshlq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_u16))) uint16x8_t __arm_vshlq_u16(uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_u16))) uint16x8_t __arm_vshlq(uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_u32))) uint32x4_t __arm_vshlq_u32(uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_u32))) uint32x4_t __arm_vshlq(uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_u8))) uint8x16_t __arm_vshlq_u8(uint8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_u8))) uint8x16_t __arm_vshlq(uint8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_s16))) int16x8_t __arm_vshlq_x_n_s16(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_s16))) int16x8_t __arm_vshlq_x_n(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_s32))) int32x4_t __arm_vshlq_x_n_s32(int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_s32))) int32x4_t __arm_vshlq_x_n(int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_s8))) int8x16_t __arm_vshlq_x_n_s8(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_s8))) int8x16_t __arm_vshlq_x_n(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_u16))) uint16x8_t __arm_vshlq_x_n_u16(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_u16))) uint16x8_t __arm_vshlq_x_n(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_u32))) uint32x4_t __arm_vshlq_x_n_u32(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_u32))) uint32x4_t __arm_vshlq_x_n(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_u8))) uint8x16_t __arm_vshlq_x_n_u8(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_u8))) uint8x16_t __arm_vshlq_x_n(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_s16))) int16x8_t __arm_vshlq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_s16))) int16x8_t __arm_vshlq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_s32))) int32x4_t __arm_vshlq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_s32))) int32x4_t __arm_vshlq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_s8))) int8x16_t __arm_vshlq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_s8))) int8x16_t __arm_vshlq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_u16))) uint16x8_t __arm_vshlq_x_u16(uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_u16))) uint16x8_t __arm_vshlq_x(uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_u32))) uint32x4_t __arm_vshlq_x_u32(uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_u32))) uint32x4_t __arm_vshlq_x(uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_u8))) uint8x16_t __arm_vshlq_x_u8(uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_u8))) uint8x16_t __arm_vshlq_x(uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_m_n_s16))) int8x16_t __arm_vshrnbq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_m_n_s16))) int8x16_t __arm_vshrnbq_m(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_m_n_s32))) int16x8_t __arm_vshrnbq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_m_n_s32))) int16x8_t __arm_vshrnbq_m(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_m_n_u16))) uint8x16_t __arm_vshrnbq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_m_n_u16))) uint8x16_t __arm_vshrnbq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_m_n_u32))) uint16x8_t __arm_vshrnbq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_m_n_u32))) uint16x8_t __arm_vshrnbq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_n_s16))) int8x16_t __arm_vshrnbq_n_s16(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_n_s16))) int8x16_t __arm_vshrnbq(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_n_s32))) int16x8_t __arm_vshrnbq_n_s32(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_n_s32))) int16x8_t __arm_vshrnbq(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_n_u16))) uint8x16_t __arm_vshrnbq_n_u16(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_n_u16))) uint8x16_t __arm_vshrnbq(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_n_u32))) uint16x8_t __arm_vshrnbq_n_u32(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_n_u32))) uint16x8_t __arm_vshrnbq(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_m_n_s16))) int8x16_t __arm_vshrntq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_m_n_s16))) int8x16_t __arm_vshrntq_m(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_m_n_s32))) int16x8_t __arm_vshrntq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_m_n_s32))) int16x8_t __arm_vshrntq_m(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_m_n_u16))) uint8x16_t __arm_vshrntq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_m_n_u16))) uint8x16_t __arm_vshrntq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_m_n_u32))) uint16x8_t __arm_vshrntq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_m_n_u32))) uint16x8_t __arm_vshrntq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_n_s16))) int8x16_t __arm_vshrntq_n_s16(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_n_s16))) int8x16_t __arm_vshrntq(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_n_s32))) int16x8_t __arm_vshrntq_n_s32(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_n_s32))) int16x8_t __arm_vshrntq(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_n_u16))) uint8x16_t __arm_vshrntq_n_u16(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_n_u16))) uint8x16_t __arm_vshrntq(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_n_u32))) uint16x8_t __arm_vshrntq_n_u32(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_n_u32))) uint16x8_t __arm_vshrntq(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_s16))) int16x8_t __arm_vshrq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_s16))) int16x8_t __arm_vshrq_m(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_s32))) int32x4_t __arm_vshrq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_s32))) int32x4_t __arm_vshrq_m(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_s8))) int8x16_t __arm_vshrq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_s8))) int8x16_t __arm_vshrq_m(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_u16))) uint16x8_t __arm_vshrq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_u16))) uint16x8_t __arm_vshrq_m(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_u32))) uint32x4_t __arm_vshrq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_u32))) uint32x4_t __arm_vshrq_m(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_u8))) uint8x16_t __arm_vshrq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_u8))) uint8x16_t __arm_vshrq_m(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_s16))) int16x8_t __arm_vshrq_n_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_s16))) int16x8_t __arm_vshrq(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_s32))) int32x4_t __arm_vshrq_n_s32(int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_s32))) int32x4_t __arm_vshrq(int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_s8))) int8x16_t __arm_vshrq_n_s8(int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_s8))) int8x16_t __arm_vshrq(int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_u16))) uint16x8_t __arm_vshrq_n_u16(uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_u16))) uint16x8_t __arm_vshrq(uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_u32))) uint32x4_t __arm_vshrq_n_u32(uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_u32))) uint32x4_t __arm_vshrq(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_u8))) uint8x16_t __arm_vshrq_n_u8(uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_u8))) uint8x16_t __arm_vshrq(uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_s16))) int16x8_t __arm_vshrq_x_n_s16(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_s16))) int16x8_t __arm_vshrq_x(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_s32))) int32x4_t __arm_vshrq_x_n_s32(int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_s32))) int32x4_t __arm_vshrq_x(int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_s8))) int8x16_t __arm_vshrq_x_n_s8(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_s8))) int8x16_t __arm_vshrq_x(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_u16))) uint16x8_t __arm_vshrq_x_n_u16(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_u16))) uint16x8_t __arm_vshrq_x(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_u32))) uint32x4_t __arm_vshrq_x_n_u32(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_u32))) uint32x4_t __arm_vshrq_x(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_u8))) uint8x16_t __arm_vshrq_x_n_u8(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_u8))) uint8x16_t __arm_vshrq_x(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_s16))) int16x8_t __arm_vsliq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_s16))) int16x8_t __arm_vsliq_m(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_s32))) int32x4_t __arm_vsliq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_s32))) int32x4_t __arm_vsliq_m(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_s8))) int8x16_t __arm_vsliq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_s8))) int8x16_t __arm_vsliq_m(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_u16))) uint16x8_t __arm_vsliq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_u16))) uint16x8_t __arm_vsliq_m(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_u32))) uint32x4_t __arm_vsliq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_u32))) uint32x4_t __arm_vsliq_m(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_u8))) uint8x16_t __arm_vsliq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_u8))) uint8x16_t __arm_vsliq_m(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_s16))) int16x8_t __arm_vsliq_n_s16(int16x8_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_s16))) int16x8_t __arm_vsliq(int16x8_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_s32))) int32x4_t __arm_vsliq_n_s32(int32x4_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_s32))) int32x4_t __arm_vsliq(int32x4_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_s8))) int8x16_t __arm_vsliq_n_s8(int8x16_t, int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_s8))) int8x16_t __arm_vsliq(int8x16_t, int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_u16))) uint16x8_t __arm_vsliq_n_u16(uint16x8_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_u16))) uint16x8_t __arm_vsliq(uint16x8_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_u32))) uint32x4_t __arm_vsliq_n_u32(uint32x4_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_u32))) uint32x4_t __arm_vsliq(uint32x4_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_u8))) uint8x16_t __arm_vsliq_n_u8(uint8x16_t, uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_u8))) uint8x16_t __arm_vsliq(uint8x16_t, uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_s16))) int16x8_t __arm_vsriq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_s16))) int16x8_t __arm_vsriq_m(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_s32))) int32x4_t __arm_vsriq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_s32))) int32x4_t __arm_vsriq_m(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_s8))) int8x16_t __arm_vsriq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_s8))) int8x16_t __arm_vsriq_m(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_u16))) uint16x8_t __arm_vsriq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_u16))) uint16x8_t __arm_vsriq_m(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_u32))) uint32x4_t __arm_vsriq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_u32))) uint32x4_t __arm_vsriq_m(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_u8))) uint8x16_t __arm_vsriq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_u8))) uint8x16_t __arm_vsriq_m(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_s16))) int16x8_t __arm_vsriq_n_s16(int16x8_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_s16))) int16x8_t __arm_vsriq(int16x8_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_s32))) int32x4_t __arm_vsriq_n_s32(int32x4_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_s32))) int32x4_t __arm_vsriq(int32x4_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_s8))) int8x16_t __arm_vsriq_n_s8(int8x16_t, int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_s8))) int8x16_t __arm_vsriq(int8x16_t, int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_u16))) uint16x8_t __arm_vsriq_n_u16(uint16x8_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_u16))) uint16x8_t __arm_vsriq(uint16x8_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_u32))) uint32x4_t __arm_vsriq_n_u32(uint32x4_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_u32))) uint32x4_t __arm_vsriq(uint32x4_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_u8))) uint8x16_t __arm_vsriq_n_u8(uint8x16_t, uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_u8))) uint8x16_t __arm_vsriq(uint8x16_t, uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_s16))) void __arm_vst1q_p_s16(int16_t *, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_s16))) void __arm_vst1q_p(int16_t *, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_s32))) void __arm_vst1q_p_s32(int32_t *, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_s32))) void __arm_vst1q_p(int32_t *, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_s8))) void __arm_vst1q_p_s8(int8_t *, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_s8))) void __arm_vst1q_p(int8_t *, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_u16))) void __arm_vst1q_p_u16(uint16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_u16))) void __arm_vst1q_p(uint16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_u32))) void __arm_vst1q_p_u32(uint32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_u32))) void __arm_vst1q_p(uint32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_u8))) void __arm_vst1q_p_u8(uint8_t *, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_u8))) void __arm_vst1q_p(uint8_t *, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_s16))) void __arm_vst1q_s16(int16_t *, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_s16))) void __arm_vst1q(int16_t *, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_s32))) void __arm_vst1q_s32(int32_t *, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_s32))) void __arm_vst1q(int32_t *, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_s8))) void __arm_vst1q_s8(int8_t *, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_s8))) void __arm_vst1q(int8_t *, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_u16))) void __arm_vst1q_u16(uint16_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_u16))) void __arm_vst1q(uint16_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_u32))) void __arm_vst1q_u32(uint32_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_u32))) void __arm_vst1q(uint32_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_u8))) void __arm_vst1q_u8(uint8_t *, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_u8))) void __arm_vst1q(uint8_t *, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst2q_s16))) void __arm_vst2q_s16(int16_t *, int16x8x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst2q_s16))) void __arm_vst2q(int16_t *, int16x8x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst2q_s32))) void __arm_vst2q_s32(int32_t *, int32x4x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst2q_s32))) void __arm_vst2q(int32_t *, int32x4x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst2q_s8))) void __arm_vst2q_s8(int8_t *, int8x16x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst2q_s8))) void __arm_vst2q(int8_t *, int8x16x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst2q_u16))) void __arm_vst2q_u16(uint16_t *, uint16x8x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst2q_u16))) void __arm_vst2q(uint16_t *, uint16x8x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst2q_u32))) void __arm_vst2q_u32(uint32_t *, uint32x4x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst2q_u32))) void __arm_vst2q(uint32_t *, uint32x4x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst2q_u8))) void __arm_vst2q_u8(uint8_t *, uint8x16x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst2q_u8))) void __arm_vst2q(uint8_t *, uint8x16x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst4q_s16))) void __arm_vst4q_s16(int16_t *, int16x8x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst4q_s16))) void __arm_vst4q(int16_t *, int16x8x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst4q_s32))) void __arm_vst4q_s32(int32_t *, int32x4x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst4q_s32))) void __arm_vst4q(int32_t *, int32x4x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst4q_s8))) void __arm_vst4q_s8(int8_t *, int8x16x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst4q_s8))) void __arm_vst4q(int8_t *, int8x16x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst4q_u16))) void __arm_vst4q_u16(uint16_t *, uint16x8x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst4q_u16))) void __arm_vst4q(uint16_t *, uint16x8x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst4q_u32))) void __arm_vst4q_u32(uint32_t *, uint32x4x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst4q_u32))) void __arm_vst4q(uint32_t *, uint32x4x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst4q_u8))) void __arm_vst4q_u8(uint8_t *, uint8x16x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst4q_u8))) void __arm_vst4q(uint8_t *, uint8x16x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_s16))) void __arm_vstrbq_p_s16(int8_t *, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_s16))) void __arm_vstrbq_p(int8_t *, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_s32))) void __arm_vstrbq_p_s32(int8_t *, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_s32))) void __arm_vstrbq_p(int8_t *, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_s8))) void __arm_vstrbq_p_s8(int8_t *, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_s8))) void __arm_vstrbq_p(int8_t *, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_u16))) void __arm_vstrbq_p_u16(uint8_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_u16))) void __arm_vstrbq_p(uint8_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_u32))) void __arm_vstrbq_p_u32(uint8_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_u32))) void __arm_vstrbq_p(uint8_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_u8))) void __arm_vstrbq_p_u8(uint8_t *, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_u8))) void __arm_vstrbq_p(uint8_t *, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_s16))) void __arm_vstrbq_s16(int8_t *, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_s16))) void __arm_vstrbq(int8_t *, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_s32))) void __arm_vstrbq_s32(int8_t *, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_s32))) void __arm_vstrbq(int8_t *, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_s8))) void __arm_vstrbq_s8(int8_t *, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_s8))) void __arm_vstrbq(int8_t *, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s16))) void __arm_vstrbq_scatter_offset_p_s16(int8_t *, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s16))) void __arm_vstrbq_scatter_offset_p(int8_t *, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s32))) void __arm_vstrbq_scatter_offset_p_s32(int8_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s32))) void __arm_vstrbq_scatter_offset_p(int8_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s8))) void __arm_vstrbq_scatter_offset_p_s8(int8_t *, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s8))) void __arm_vstrbq_scatter_offset_p(int8_t *, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u16))) void __arm_vstrbq_scatter_offset_p_u16(uint8_t *, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u16))) void __arm_vstrbq_scatter_offset_p(uint8_t *, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u32))) void __arm_vstrbq_scatter_offset_p_u32(uint8_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u32))) void __arm_vstrbq_scatter_offset_p(uint8_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u8))) void __arm_vstrbq_scatter_offset_p_u8(uint8_t *, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u8))) void __arm_vstrbq_scatter_offset_p(uint8_t *, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_s16))) void __arm_vstrbq_scatter_offset_s16(int8_t *, uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_s16))) void __arm_vstrbq_scatter_offset(int8_t *, uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_s32))) void __arm_vstrbq_scatter_offset_s32(int8_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_s32))) void __arm_vstrbq_scatter_offset(int8_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_s8))) void __arm_vstrbq_scatter_offset_s8(int8_t *, uint8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_s8))) void __arm_vstrbq_scatter_offset(int8_t *, uint8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_u16))) void __arm_vstrbq_scatter_offset_u16(uint8_t *, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_u16))) void __arm_vstrbq_scatter_offset(uint8_t *, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_u32))) void __arm_vstrbq_scatter_offset_u32(uint8_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_u32))) void __arm_vstrbq_scatter_offset(uint8_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_u8))) void __arm_vstrbq_scatter_offset_u8(uint8_t *, uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_u8))) void __arm_vstrbq_scatter_offset(uint8_t *, uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_u16))) void __arm_vstrbq_u16(uint8_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_u16))) void __arm_vstrbq(uint8_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_u32))) void __arm_vstrbq_u32(uint8_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_u32))) void __arm_vstrbq(uint8_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_u8))) void __arm_vstrbq_u8(uint8_t *, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_u8))) void __arm_vstrbq(uint8_t *, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_p_s64))) void __arm_vstrdq_scatter_base_p_s64(uint64x2_t, int, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_p_s64))) void __arm_vstrdq_scatter_base_p(uint64x2_t, int, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_p_u64))) void __arm_vstrdq_scatter_base_p_u64(uint64x2_t, int, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_p_u64))) void __arm_vstrdq_scatter_base_p(uint64x2_t, int, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_s64))) void __arm_vstrdq_scatter_base_s64(uint64x2_t, int, int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_s64))) void __arm_vstrdq_scatter_base(uint64x2_t, int, int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_u64))) void __arm_vstrdq_scatter_base_u64(uint64x2_t, int, uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_u64))) void __arm_vstrdq_scatter_base(uint64x2_t, int, uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_p_s64))) void __arm_vstrdq_scatter_base_wb_p_s64(uint64x2_t *, int, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_p_s64))) void __arm_vstrdq_scatter_base_wb_p(uint64x2_t *, int, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_p_u64))) void __arm_vstrdq_scatter_base_wb_p_u64(uint64x2_t *, int, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_p_u64))) void __arm_vstrdq_scatter_base_wb_p(uint64x2_t *, int, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_s64))) void __arm_vstrdq_scatter_base_wb_s64(uint64x2_t *, int, int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_s64))) void __arm_vstrdq_scatter_base_wb(uint64x2_t *, int, int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_u64))) void __arm_vstrdq_scatter_base_wb_u64(uint64x2_t *, int, uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_u64))) void __arm_vstrdq_scatter_base_wb(uint64x2_t *, int, uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_offset_p_s64))) void __arm_vstrdq_scatter_offset_p_s64(int64_t *, uint64x2_t, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_offset_p_s64))) void __arm_vstrdq_scatter_offset_p(int64_t *, uint64x2_t, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_offset_p_u64))) void __arm_vstrdq_scatter_offset_p_u64(uint64_t *, uint64x2_t, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_offset_p_u64))) void __arm_vstrdq_scatter_offset_p(uint64_t *, uint64x2_t, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_offset_s64))) void __arm_vstrdq_scatter_offset_s64(int64_t *, uint64x2_t, int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_offset_s64))) void __arm_vstrdq_scatter_offset(int64_t *, uint64x2_t, int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_offset_u64))) void __arm_vstrdq_scatter_offset_u64(uint64_t *, uint64x2_t, uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_offset_u64))) void __arm_vstrdq_scatter_offset(uint64_t *, uint64x2_t, uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_p_s64))) void __arm_vstrdq_scatter_shifted_offset_p_s64(int64_t *, uint64x2_t, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_p_s64))) void __arm_vstrdq_scatter_shifted_offset_p(int64_t *, uint64x2_t, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_p_u64))) void __arm_vstrdq_scatter_shifted_offset_p_u64(uint64_t *, uint64x2_t, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_p_u64))) void __arm_vstrdq_scatter_shifted_offset_p(uint64_t *, uint64x2_t, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_s64))) void __arm_vstrdq_scatter_shifted_offset_s64(int64_t *, uint64x2_t, int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_s64))) void __arm_vstrdq_scatter_shifted_offset(int64_t *, uint64x2_t, int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_u64))) void __arm_vstrdq_scatter_shifted_offset_u64(uint64_t *, uint64x2_t, uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_u64))) void __arm_vstrdq_scatter_shifted_offset(uint64_t *, uint64x2_t, uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_s16))) void __arm_vstrhq_p_s16(int16_t *, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_s16))) void __arm_vstrhq_p(int16_t *, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_s32))) void __arm_vstrhq_p_s32(int16_t *, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_s32))) void __arm_vstrhq_p(int16_t *, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_u16))) void __arm_vstrhq_p_u16(uint16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_u16))) void __arm_vstrhq_p(uint16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_u32))) void __arm_vstrhq_p_u32(uint16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_u32))) void __arm_vstrhq_p(uint16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_s16))) void __arm_vstrhq_s16(int16_t *, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_s16))) void __arm_vstrhq(int16_t *, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_s32))) void __arm_vstrhq_s32(int16_t *, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_s32))) void __arm_vstrhq(int16_t *, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_s16))) void __arm_vstrhq_scatter_offset_p_s16(int16_t *, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_s16))) void __arm_vstrhq_scatter_offset_p(int16_t *, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_s32))) void __arm_vstrhq_scatter_offset_p_s32(int16_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_s32))) void __arm_vstrhq_scatter_offset_p(int16_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_u16))) void __arm_vstrhq_scatter_offset_p_u16(uint16_t *, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_u16))) void __arm_vstrhq_scatter_offset_p(uint16_t *, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_u32))) void __arm_vstrhq_scatter_offset_p_u32(uint16_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_u32))) void __arm_vstrhq_scatter_offset_p(uint16_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_s16))) void __arm_vstrhq_scatter_offset_s16(int16_t *, uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_s16))) void __arm_vstrhq_scatter_offset(int16_t *, uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_s32))) void __arm_vstrhq_scatter_offset_s32(int16_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_s32))) void __arm_vstrhq_scatter_offset(int16_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_u16))) void __arm_vstrhq_scatter_offset_u16(uint16_t *, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_u16))) void __arm_vstrhq_scatter_offset(uint16_t *, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_u32))) void __arm_vstrhq_scatter_offset_u32(uint16_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_u32))) void __arm_vstrhq_scatter_offset(uint16_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_s16))) void __arm_vstrhq_scatter_shifted_offset_p_s16(int16_t *, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_s16))) void __arm_vstrhq_scatter_shifted_offset_p(int16_t *, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_s32))) void __arm_vstrhq_scatter_shifted_offset_p_s32(int16_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_s32))) void __arm_vstrhq_scatter_shifted_offset_p(int16_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_u16))) void __arm_vstrhq_scatter_shifted_offset_p_u16(uint16_t *, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_u16))) void __arm_vstrhq_scatter_shifted_offset_p(uint16_t *, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_u32))) void __arm_vstrhq_scatter_shifted_offset_p_u32(uint16_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_u32))) void __arm_vstrhq_scatter_shifted_offset_p(uint16_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_s16))) void __arm_vstrhq_scatter_shifted_offset_s16(int16_t *, uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_s16))) void __arm_vstrhq_scatter_shifted_offset(int16_t *, uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_s32))) void __arm_vstrhq_scatter_shifted_offset_s32(int16_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_s32))) void __arm_vstrhq_scatter_shifted_offset(int16_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_u16))) void __arm_vstrhq_scatter_shifted_offset_u16(uint16_t *, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_u16))) void __arm_vstrhq_scatter_shifted_offset(uint16_t *, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_u32))) void __arm_vstrhq_scatter_shifted_offset_u32(uint16_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_u32))) void __arm_vstrhq_scatter_shifted_offset(uint16_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_u16))) void __arm_vstrhq_u16(uint16_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_u16))) void __arm_vstrhq(uint16_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_u32))) void __arm_vstrhq_u32(uint16_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_u32))) void __arm_vstrhq(uint16_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_p_s32))) void __arm_vstrwq_p_s32(int32_t *, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_p_s32))) void __arm_vstrwq_p(int32_t *, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_p_u32))) void __arm_vstrwq_p_u32(uint32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_p_u32))) void __arm_vstrwq_p(uint32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_s32))) void __arm_vstrwq_s32(int32_t *, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_s32))) void __arm_vstrwq(int32_t *, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_p_s32))) void __arm_vstrwq_scatter_base_p_s32(uint32x4_t, int, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_p_s32))) void __arm_vstrwq_scatter_base_p(uint32x4_t, int, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_p_u32))) void __arm_vstrwq_scatter_base_p_u32(uint32x4_t, int, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_p_u32))) void __arm_vstrwq_scatter_base_p(uint32x4_t, int, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_s32))) void __arm_vstrwq_scatter_base_s32(uint32x4_t, int, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_s32))) void __arm_vstrwq_scatter_base(uint32x4_t, int, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_u32))) void __arm_vstrwq_scatter_base_u32(uint32x4_t, int, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_u32))) void __arm_vstrwq_scatter_base(uint32x4_t, int, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_s32))) void __arm_vstrwq_scatter_base_wb_p_s32(uint32x4_t *, int, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_s32))) void __arm_vstrwq_scatter_base_wb_p(uint32x4_t *, int, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_u32))) void __arm_vstrwq_scatter_base_wb_p_u32(uint32x4_t *, int, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_u32))) void __arm_vstrwq_scatter_base_wb_p(uint32x4_t *, int, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_s32))) void __arm_vstrwq_scatter_base_wb_s32(uint32x4_t *, int, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_s32))) void __arm_vstrwq_scatter_base_wb(uint32x4_t *, int, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_u32))) void __arm_vstrwq_scatter_base_wb_u32(uint32x4_t *, int, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_u32))) void __arm_vstrwq_scatter_base_wb(uint32x4_t *, int, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_s32))) void __arm_vstrwq_scatter_offset_p_s32(int32_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_s32))) void __arm_vstrwq_scatter_offset_p(int32_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_u32))) void __arm_vstrwq_scatter_offset_p_u32(uint32_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_u32))) void __arm_vstrwq_scatter_offset_p(uint32_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_s32))) void __arm_vstrwq_scatter_offset_s32(int32_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_s32))) void __arm_vstrwq_scatter_offset(int32_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_u32))) void __arm_vstrwq_scatter_offset_u32(uint32_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_u32))) void __arm_vstrwq_scatter_offset(uint32_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_s32))) void __arm_vstrwq_scatter_shifted_offset_p_s32(int32_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_s32))) void __arm_vstrwq_scatter_shifted_offset_p(int32_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_u32))) void __arm_vstrwq_scatter_shifted_offset_p_u32(uint32_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_u32))) void __arm_vstrwq_scatter_shifted_offset_p(uint32_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_s32))) void __arm_vstrwq_scatter_shifted_offset_s32(int32_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_s32))) void __arm_vstrwq_scatter_shifted_offset(int32_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_u32))) void __arm_vstrwq_scatter_shifted_offset_u32(uint32_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_u32))) void __arm_vstrwq_scatter_shifted_offset(uint32_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_u32))) void __arm_vstrwq_u32(uint32_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_u32))) void __arm_vstrwq(uint32_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_s16))) int16x8_t __arm_vsubq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_s16))) int16x8_t __arm_vsubq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_s32))) int32x4_t __arm_vsubq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_s32))) int32x4_t __arm_vsubq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_s8))) int8x16_t __arm_vsubq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_s8))) int8x16_t __arm_vsubq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_u16))) uint16x8_t __arm_vsubq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_u16))) uint16x8_t __arm_vsubq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_u32))) uint32x4_t __arm_vsubq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_u32))) uint32x4_t __arm_vsubq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_u8))) uint8x16_t __arm_vsubq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_u8))) uint8x16_t __arm_vsubq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_s16))) int16x8_t __arm_vsubq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_s16))) int16x8_t __arm_vsubq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_s32))) int32x4_t __arm_vsubq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_s32))) int32x4_t __arm_vsubq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_s8))) int8x16_t __arm_vsubq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_s8))) int8x16_t __arm_vsubq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_u16))) uint16x8_t __arm_vsubq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_u16))) uint16x8_t __arm_vsubq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_u32))) uint32x4_t __arm_vsubq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_u32))) uint32x4_t __arm_vsubq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_u8))) uint8x16_t __arm_vsubq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_u8))) uint8x16_t __arm_vsubq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_s16))) int16x8_t __arm_vsubq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_s16))) int16x8_t __arm_vsubq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_s32))) int32x4_t __arm_vsubq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_s32))) int32x4_t __arm_vsubq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_s8))) int8x16_t __arm_vsubq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_s8))) int8x16_t __arm_vsubq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_u16))) uint16x8_t __arm_vsubq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_u16))) uint16x8_t __arm_vsubq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_u32))) uint32x4_t __arm_vsubq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_u32))) uint32x4_t __arm_vsubq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_u8))) uint8x16_t __arm_vsubq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_u8))) uint8x16_t __arm_vsubq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_s16))) int16x8_t __arm_vsubq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_s16))) int16x8_t __arm_vsubq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_s32))) int32x4_t __arm_vsubq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_s32))) int32x4_t __arm_vsubq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_s8))) int8x16_t __arm_vsubq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_s8))) int8x16_t __arm_vsubq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_u16))) uint16x8_t __arm_vsubq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_u16))) uint16x8_t __arm_vsubq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_u32))) uint32x4_t __arm_vsubq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_u32))) uint32x4_t __arm_vsubq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_u8))) uint8x16_t __arm_vsubq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_u8))) uint8x16_t __arm_vsubq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_s16))) int16x8_t __arm_vsubq_x_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_s16))) int16x8_t __arm_vsubq_x(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_s32))) int32x4_t __arm_vsubq_x_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_s32))) int32x4_t __arm_vsubq_x(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_s8))) int8x16_t __arm_vsubq_x_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_s8))) int8x16_t __arm_vsubq_x(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_u16))) uint16x8_t __arm_vsubq_x_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_u16))) uint16x8_t __arm_vsubq_x(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_u32))) uint32x4_t __arm_vsubq_x_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_u32))) uint32x4_t __arm_vsubq_x(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_u8))) uint8x16_t __arm_vsubq_x_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_u8))) uint8x16_t __arm_vsubq_x(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_s16))) int16x8_t __arm_vsubq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_s16))) int16x8_t __arm_vsubq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_s32))) int32x4_t __arm_vsubq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_s32))) int32x4_t __arm_vsubq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_s8))) int8x16_t __arm_vsubq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_s8))) int8x16_t __arm_vsubq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_u16))) uint16x8_t __arm_vsubq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_u16))) uint16x8_t __arm_vsubq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_u32))) uint32x4_t __arm_vsubq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_u32))) uint32x4_t __arm_vsubq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_u8))) uint8x16_t __arm_vsubq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_u8))) uint8x16_t __arm_vsubq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_s16))) int16x8_t __arm_vuninitializedq(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_s32))) int32x4_t __arm_vuninitializedq(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_s64))) int64x2_t __arm_vuninitializedq(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_s8))) int8x16_t __arm_vuninitializedq(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_u16))) uint16x8_t __arm_vuninitializedq(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_u32))) uint32x4_t __arm_vuninitializedq(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_u64))) uint64x2_t __arm_vuninitializedq(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_u8))) uint8x16_t __arm_vuninitializedq(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_s16))) int16x8_t __arm_vuninitializedq_s16(); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_s32))) int32x4_t __arm_vuninitializedq_s32(); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_s64))) int64x2_t __arm_vuninitializedq_s64(); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_s8))) int8x16_t __arm_vuninitializedq_s8(); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_u16))) uint16x8_t __arm_vuninitializedq_u16(); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_u32))) uint32x4_t __arm_vuninitializedq_u32(); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_u64))) uint64x2_t __arm_vuninitializedq_u64(); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_u8))) uint8x16_t __arm_vuninitializedq_u8(); #if (__ARM_FEATURE_MVE & 2) typedef __fp16 float16_t; typedef float float32_t; typedef __attribute__((__neon_vector_type__(8), __clang_arm_mve_strict_polymorphism)) float16_t float16x8_t; typedef struct { float16x8_t val[2]; } float16x8x2_t; typedef struct { float16x8_t val[4]; } float16x8x4_t; typedef __attribute__((__neon_vector_type__(4), __clang_arm_mve_strict_polymorphism)) float32_t float32x4_t; typedef struct { float32x4_t val[2]; } float32x4x2_t; typedef struct { float32x4_t val[4]; } float32x4x4_t; static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_f16))) float16x8_t __arm_vabdq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_f16))) float16x8_t __arm_vabdq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_f32))) float32x4_t __arm_vabdq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_f32))) float32x4_t __arm_vabdq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_f16))) float16x8_t __arm_vabdq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_f16))) float16x8_t __arm_vabdq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_f32))) float32x4_t __arm_vabdq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_f32))) float32x4_t __arm_vabdq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_f16))) float16x8_t __arm_vabdq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_f16))) float16x8_t __arm_vabdq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_f32))) float32x4_t __arm_vabdq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_f32))) float32x4_t __arm_vabdq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_f16))) float16x8_t __arm_vabsq_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_f16))) float16x8_t __arm_vabsq(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_f32))) float32x4_t __arm_vabsq_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_f32))) float32x4_t __arm_vabsq(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_f16))) float16x8_t __arm_vabsq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_f16))) float16x8_t __arm_vabsq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_f32))) float32x4_t __arm_vabsq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_f32))) float32x4_t __arm_vabsq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_f16))) float16x8_t __arm_vabsq_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_f16))) float16x8_t __arm_vabsq_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_f32))) float32x4_t __arm_vabsq_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_f32))) float32x4_t __arm_vabsq_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_f16))) float16x8_t __arm_vaddq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_f16))) float16x8_t __arm_vaddq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_f32))) float32x4_t __arm_vaddq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_f32))) float32x4_t __arm_vaddq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_f16))) float16x8_t __arm_vaddq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_f16))) float16x8_t __arm_vaddq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_f32))) float32x4_t __arm_vaddq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_f32))) float32x4_t __arm_vaddq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_f16))) float16x8_t __arm_vaddq_m_n_f16(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_f16))) float16x8_t __arm_vaddq_m(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_f32))) float32x4_t __arm_vaddq_m_n_f32(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_f32))) float32x4_t __arm_vaddq_m(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_f16))) float16x8_t __arm_vaddq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_f16))) float16x8_t __arm_vaddq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_f32))) float32x4_t __arm_vaddq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_f32))) float32x4_t __arm_vaddq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_f16))) float16x8_t __arm_vaddq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_f16))) float16x8_t __arm_vaddq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_f32))) float32x4_t __arm_vaddq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_f32))) float32x4_t __arm_vaddq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_f16))) float16x8_t __arm_vaddq_x_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_f16))) float16x8_t __arm_vaddq_x(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_f32))) float32x4_t __arm_vaddq_x_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_f32))) float32x4_t __arm_vaddq_x(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_f16))) float16x8_t __arm_vandq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_f16))) float16x8_t __arm_vandq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_f32))) float32x4_t __arm_vandq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_f32))) float32x4_t __arm_vandq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_f16))) float16x8_t __arm_vandq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_f16))) float16x8_t __arm_vandq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_f32))) float32x4_t __arm_vandq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_f32))) float32x4_t __arm_vandq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_f16))) float16x8_t __arm_vandq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_f16))) float16x8_t __arm_vandq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_f32))) float32x4_t __arm_vandq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_f32))) float32x4_t __arm_vandq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_f16))) float16x8_t __arm_vbicq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_f16))) float16x8_t __arm_vbicq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_f32))) float32x4_t __arm_vbicq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_f32))) float32x4_t __arm_vbicq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_f16))) float16x8_t __arm_vbicq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_f16))) float16x8_t __arm_vbicq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_f32))) float32x4_t __arm_vbicq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_f32))) float32x4_t __arm_vbicq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_f16))) float16x8_t __arm_vbicq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_f16))) float16x8_t __arm_vbicq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_f32))) float32x4_t __arm_vbicq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_f32))) float32x4_t __arm_vbicq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_f16))) float16x8_t __arm_vbrsrq_m_n_f16(float16x8_t, float16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_f16))) float16x8_t __arm_vbrsrq_m(float16x8_t, float16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_f32))) float32x4_t __arm_vbrsrq_m_n_f32(float32x4_t, float32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_f32))) float32x4_t __arm_vbrsrq_m(float32x4_t, float32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_f16))) float16x8_t __arm_vbrsrq_n_f16(float16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_f16))) float16x8_t __arm_vbrsrq(float16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_f32))) float32x4_t __arm_vbrsrq_n_f32(float32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_f32))) float32x4_t __arm_vbrsrq(float32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_f16))) float16x8_t __arm_vbrsrq_x_n_f16(float16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_f16))) float16x8_t __arm_vbrsrq_x(float16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_f32))) float32x4_t __arm_vbrsrq_x_n_f32(float32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_f32))) float32x4_t __arm_vbrsrq_x(float32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_f16))) float16x8_t __arm_vcaddq_rot270_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_f16))) float16x8_t __arm_vcaddq_rot270(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_f32))) float32x4_t __arm_vcaddq_rot270_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_f32))) float32x4_t __arm_vcaddq_rot270(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_f16))) float16x8_t __arm_vcaddq_rot270_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_f16))) float16x8_t __arm_vcaddq_rot270_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_f32))) float32x4_t __arm_vcaddq_rot270_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_f32))) float32x4_t __arm_vcaddq_rot270_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_f16))) float16x8_t __arm_vcaddq_rot270_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_f16))) float16x8_t __arm_vcaddq_rot270_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_f32))) float32x4_t __arm_vcaddq_rot270_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_f32))) float32x4_t __arm_vcaddq_rot270_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_f16))) float16x8_t __arm_vcaddq_rot90_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_f16))) float16x8_t __arm_vcaddq_rot90(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_f32))) float32x4_t __arm_vcaddq_rot90_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_f32))) float32x4_t __arm_vcaddq_rot90(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_f16))) float16x8_t __arm_vcaddq_rot90_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_f16))) float16x8_t __arm_vcaddq_rot90_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_f32))) float32x4_t __arm_vcaddq_rot90_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_f32))) float32x4_t __arm_vcaddq_rot90_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_f16))) float16x8_t __arm_vcaddq_rot90_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_f16))) float16x8_t __arm_vcaddq_rot90_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_f32))) float32x4_t __arm_vcaddq_rot90_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_f32))) float32x4_t __arm_vcaddq_rot90_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_f16))) float16x8_t __arm_vcmlaq_f16(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_f16))) float16x8_t __arm_vcmlaq(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_f32))) float32x4_t __arm_vcmlaq_f32(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_f32))) float32x4_t __arm_vcmlaq(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_m_f16))) float16x8_t __arm_vcmlaq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_m_f16))) float16x8_t __arm_vcmlaq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_m_f32))) float32x4_t __arm_vcmlaq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_m_f32))) float32x4_t __arm_vcmlaq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot180_f16))) float16x8_t __arm_vcmlaq_rot180_f16(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot180_f16))) float16x8_t __arm_vcmlaq_rot180(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot180_f32))) float32x4_t __arm_vcmlaq_rot180_f32(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot180_f32))) float32x4_t __arm_vcmlaq_rot180(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot180_m_f16))) float16x8_t __arm_vcmlaq_rot180_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot180_m_f16))) float16x8_t __arm_vcmlaq_rot180_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot180_m_f32))) float32x4_t __arm_vcmlaq_rot180_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot180_m_f32))) float32x4_t __arm_vcmlaq_rot180_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot270_f16))) float16x8_t __arm_vcmlaq_rot270_f16(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot270_f16))) float16x8_t __arm_vcmlaq_rot270(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot270_f32))) float32x4_t __arm_vcmlaq_rot270_f32(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot270_f32))) float32x4_t __arm_vcmlaq_rot270(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot270_m_f16))) float16x8_t __arm_vcmlaq_rot270_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot270_m_f16))) float16x8_t __arm_vcmlaq_rot270_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot270_m_f32))) float32x4_t __arm_vcmlaq_rot270_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot270_m_f32))) float32x4_t __arm_vcmlaq_rot270_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot90_f16))) float16x8_t __arm_vcmlaq_rot90_f16(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot90_f16))) float16x8_t __arm_vcmlaq_rot90(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot90_f32))) float32x4_t __arm_vcmlaq_rot90_f32(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot90_f32))) float32x4_t __arm_vcmlaq_rot90(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot90_m_f16))) float16x8_t __arm_vcmlaq_rot90_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot90_m_f16))) float16x8_t __arm_vcmlaq_rot90_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot90_m_f32))) float32x4_t __arm_vcmlaq_rot90_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot90_m_f32))) float32x4_t __arm_vcmlaq_rot90_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_f16))) mve_pred16_t __arm_vcmpeqq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_f16))) mve_pred16_t __arm_vcmpeqq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_f32))) mve_pred16_t __arm_vcmpeqq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_f32))) mve_pred16_t __arm_vcmpeqq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_f16))) mve_pred16_t __arm_vcmpeqq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_f16))) mve_pred16_t __arm_vcmpeqq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_f32))) mve_pred16_t __arm_vcmpeqq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_f32))) mve_pred16_t __arm_vcmpeqq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_f16))) mve_pred16_t __arm_vcmpeqq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_f16))) mve_pred16_t __arm_vcmpeqq_m(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_f32))) mve_pred16_t __arm_vcmpeqq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_f32))) mve_pred16_t __arm_vcmpeqq_m(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_f16))) mve_pred16_t __arm_vcmpeqq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_f16))) mve_pred16_t __arm_vcmpeqq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_f32))) mve_pred16_t __arm_vcmpeqq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_f32))) mve_pred16_t __arm_vcmpeqq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_f16))) mve_pred16_t __arm_vcmpgeq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_f16))) mve_pred16_t __arm_vcmpgeq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_f32))) mve_pred16_t __arm_vcmpgeq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_f32))) mve_pred16_t __arm_vcmpgeq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_f16))) mve_pred16_t __arm_vcmpgeq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_f16))) mve_pred16_t __arm_vcmpgeq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_f32))) mve_pred16_t __arm_vcmpgeq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_f32))) mve_pred16_t __arm_vcmpgeq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_f16))) mve_pred16_t __arm_vcmpgeq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_f16))) mve_pred16_t __arm_vcmpgeq_m(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_f32))) mve_pred16_t __arm_vcmpgeq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_f32))) mve_pred16_t __arm_vcmpgeq_m(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_f16))) mve_pred16_t __arm_vcmpgeq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_f16))) mve_pred16_t __arm_vcmpgeq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_f32))) mve_pred16_t __arm_vcmpgeq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_f32))) mve_pred16_t __arm_vcmpgeq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_f16))) mve_pred16_t __arm_vcmpgtq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_f16))) mve_pred16_t __arm_vcmpgtq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_f32))) mve_pred16_t __arm_vcmpgtq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_f32))) mve_pred16_t __arm_vcmpgtq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_f16))) mve_pred16_t __arm_vcmpgtq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_f16))) mve_pred16_t __arm_vcmpgtq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_f32))) mve_pred16_t __arm_vcmpgtq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_f32))) mve_pred16_t __arm_vcmpgtq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_f16))) mve_pred16_t __arm_vcmpgtq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_f16))) mve_pred16_t __arm_vcmpgtq_m(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_f32))) mve_pred16_t __arm_vcmpgtq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_f32))) mve_pred16_t __arm_vcmpgtq_m(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_f16))) mve_pred16_t __arm_vcmpgtq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_f16))) mve_pred16_t __arm_vcmpgtq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_f32))) mve_pred16_t __arm_vcmpgtq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_f32))) mve_pred16_t __arm_vcmpgtq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_f16))) mve_pred16_t __arm_vcmpleq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_f16))) mve_pred16_t __arm_vcmpleq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_f32))) mve_pred16_t __arm_vcmpleq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_f32))) mve_pred16_t __arm_vcmpleq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_f16))) mve_pred16_t __arm_vcmpleq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_f16))) mve_pred16_t __arm_vcmpleq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_f32))) mve_pred16_t __arm_vcmpleq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_f32))) mve_pred16_t __arm_vcmpleq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_f16))) mve_pred16_t __arm_vcmpleq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_f16))) mve_pred16_t __arm_vcmpleq_m(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_f32))) mve_pred16_t __arm_vcmpleq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_f32))) mve_pred16_t __arm_vcmpleq_m(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_f16))) mve_pred16_t __arm_vcmpleq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_f16))) mve_pred16_t __arm_vcmpleq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_f32))) mve_pred16_t __arm_vcmpleq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_f32))) mve_pred16_t __arm_vcmpleq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_f16))) mve_pred16_t __arm_vcmpltq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_f16))) mve_pred16_t __arm_vcmpltq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_f32))) mve_pred16_t __arm_vcmpltq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_f32))) mve_pred16_t __arm_vcmpltq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_f16))) mve_pred16_t __arm_vcmpltq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_f16))) mve_pred16_t __arm_vcmpltq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_f32))) mve_pred16_t __arm_vcmpltq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_f32))) mve_pred16_t __arm_vcmpltq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_f16))) mve_pred16_t __arm_vcmpltq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_f16))) mve_pred16_t __arm_vcmpltq_m(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_f32))) mve_pred16_t __arm_vcmpltq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_f32))) mve_pred16_t __arm_vcmpltq_m(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_f16))) mve_pred16_t __arm_vcmpltq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_f16))) mve_pred16_t __arm_vcmpltq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_f32))) mve_pred16_t __arm_vcmpltq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_f32))) mve_pred16_t __arm_vcmpltq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_f16))) mve_pred16_t __arm_vcmpneq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_f16))) mve_pred16_t __arm_vcmpneq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_f32))) mve_pred16_t __arm_vcmpneq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_f32))) mve_pred16_t __arm_vcmpneq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_f16))) mve_pred16_t __arm_vcmpneq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_f16))) mve_pred16_t __arm_vcmpneq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_f32))) mve_pred16_t __arm_vcmpneq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_f32))) mve_pred16_t __arm_vcmpneq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_f16))) mve_pred16_t __arm_vcmpneq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_f16))) mve_pred16_t __arm_vcmpneq_m(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_f32))) mve_pred16_t __arm_vcmpneq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_f32))) mve_pred16_t __arm_vcmpneq_m(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_f16))) mve_pred16_t __arm_vcmpneq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_f16))) mve_pred16_t __arm_vcmpneq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_f32))) mve_pred16_t __arm_vcmpneq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_f32))) mve_pred16_t __arm_vcmpneq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_f16))) float16x8_t __arm_vcmulq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_f16))) float16x8_t __arm_vcmulq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_f32))) float32x4_t __arm_vcmulq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_f32))) float32x4_t __arm_vcmulq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_m_f16))) float16x8_t __arm_vcmulq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_m_f16))) float16x8_t __arm_vcmulq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_m_f32))) float32x4_t __arm_vcmulq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_m_f32))) float32x4_t __arm_vcmulq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_f16))) float16x8_t __arm_vcmulq_rot180_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_f16))) float16x8_t __arm_vcmulq_rot180(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_f32))) float32x4_t __arm_vcmulq_rot180_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_f32))) float32x4_t __arm_vcmulq_rot180(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_m_f16))) float16x8_t __arm_vcmulq_rot180_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_m_f16))) float16x8_t __arm_vcmulq_rot180_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_m_f32))) float32x4_t __arm_vcmulq_rot180_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_m_f32))) float32x4_t __arm_vcmulq_rot180_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_x_f16))) float16x8_t __arm_vcmulq_rot180_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_x_f16))) float16x8_t __arm_vcmulq_rot180_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_x_f32))) float32x4_t __arm_vcmulq_rot180_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_x_f32))) float32x4_t __arm_vcmulq_rot180_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_f16))) float16x8_t __arm_vcmulq_rot270_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_f16))) float16x8_t __arm_vcmulq_rot270(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_f32))) float32x4_t __arm_vcmulq_rot270_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_f32))) float32x4_t __arm_vcmulq_rot270(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_m_f16))) float16x8_t __arm_vcmulq_rot270_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_m_f16))) float16x8_t __arm_vcmulq_rot270_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_m_f32))) float32x4_t __arm_vcmulq_rot270_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_m_f32))) float32x4_t __arm_vcmulq_rot270_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_x_f16))) float16x8_t __arm_vcmulq_rot270_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_x_f16))) float16x8_t __arm_vcmulq_rot270_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_x_f32))) float32x4_t __arm_vcmulq_rot270_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_x_f32))) float32x4_t __arm_vcmulq_rot270_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_f16))) float16x8_t __arm_vcmulq_rot90_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_f16))) float16x8_t __arm_vcmulq_rot90(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_f32))) float32x4_t __arm_vcmulq_rot90_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_f32))) float32x4_t __arm_vcmulq_rot90(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_m_f16))) float16x8_t __arm_vcmulq_rot90_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_m_f16))) float16x8_t __arm_vcmulq_rot90_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_m_f32))) float32x4_t __arm_vcmulq_rot90_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_m_f32))) float32x4_t __arm_vcmulq_rot90_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_x_f16))) float16x8_t __arm_vcmulq_rot90_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_x_f16))) float16x8_t __arm_vcmulq_rot90_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_x_f32))) float32x4_t __arm_vcmulq_rot90_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_x_f32))) float32x4_t __arm_vcmulq_rot90_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_x_f16))) float16x8_t __arm_vcmulq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_x_f16))) float16x8_t __arm_vcmulq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_x_f32))) float32x4_t __arm_vcmulq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_x_f32))) float32x4_t __arm_vcmulq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_f16))) float16x8_t __arm_vcreateq_f16(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_f32))) float32x4_t __arm_vcreateq_f32(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_m_s16_f16))) int16x8_t __arm_vcvtaq_m_s16_f16(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_m_s16_f16))) int16x8_t __arm_vcvtaq_m(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_m_s32_f32))) int32x4_t __arm_vcvtaq_m_s32_f32(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_m_s32_f32))) int32x4_t __arm_vcvtaq_m(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_m_u16_f16))) uint16x8_t __arm_vcvtaq_m_u16_f16(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_m_u16_f16))) uint16x8_t __arm_vcvtaq_m(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_m_u32_f32))) uint32x4_t __arm_vcvtaq_m_u32_f32(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_m_u32_f32))) uint32x4_t __arm_vcvtaq_m(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_s16_f16))) int16x8_t __arm_vcvtaq_s16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_s32_f32))) int32x4_t __arm_vcvtaq_s32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_u16_f16))) uint16x8_t __arm_vcvtaq_u16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_u32_f32))) uint32x4_t __arm_vcvtaq_u32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_x_s16_f16))) int16x8_t __arm_vcvtaq_x_s16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_x_s32_f32))) int32x4_t __arm_vcvtaq_x_s32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_x_u16_f16))) uint16x8_t __arm_vcvtaq_x_u16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_x_u32_f32))) uint32x4_t __arm_vcvtaq_x_u32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtbq_f16_f32))) float16x8_t __arm_vcvtbq_f16_f32(float16x8_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtbq_f32_f16))) float32x4_t __arm_vcvtbq_f32_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtbq_m_f16_f32))) float16x8_t __arm_vcvtbq_m_f16_f32(float16x8_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtbq_m_f32_f16))) float32x4_t __arm_vcvtbq_m_f32_f16(float32x4_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtbq_x_f32_f16))) float32x4_t __arm_vcvtbq_x_f32_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_m_s16_f16))) int16x8_t __arm_vcvtmq_m_s16_f16(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_m_s16_f16))) int16x8_t __arm_vcvtmq_m(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_m_s32_f32))) int32x4_t __arm_vcvtmq_m_s32_f32(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_m_s32_f32))) int32x4_t __arm_vcvtmq_m(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_m_u16_f16))) uint16x8_t __arm_vcvtmq_m_u16_f16(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_m_u16_f16))) uint16x8_t __arm_vcvtmq_m(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_m_u32_f32))) uint32x4_t __arm_vcvtmq_m_u32_f32(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_m_u32_f32))) uint32x4_t __arm_vcvtmq_m(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_s16_f16))) int16x8_t __arm_vcvtmq_s16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_s32_f32))) int32x4_t __arm_vcvtmq_s32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_u16_f16))) uint16x8_t __arm_vcvtmq_u16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_u32_f32))) uint32x4_t __arm_vcvtmq_u32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_x_s16_f16))) int16x8_t __arm_vcvtmq_x_s16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_x_s32_f32))) int32x4_t __arm_vcvtmq_x_s32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_x_u16_f16))) uint16x8_t __arm_vcvtmq_x_u16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_x_u32_f32))) uint32x4_t __arm_vcvtmq_x_u32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_m_s16_f16))) int16x8_t __arm_vcvtnq_m_s16_f16(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_m_s16_f16))) int16x8_t __arm_vcvtnq_m(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_m_s32_f32))) int32x4_t __arm_vcvtnq_m_s32_f32(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_m_s32_f32))) int32x4_t __arm_vcvtnq_m(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_m_u16_f16))) uint16x8_t __arm_vcvtnq_m_u16_f16(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_m_u16_f16))) uint16x8_t __arm_vcvtnq_m(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_m_u32_f32))) uint32x4_t __arm_vcvtnq_m_u32_f32(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_m_u32_f32))) uint32x4_t __arm_vcvtnq_m(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_s16_f16))) int16x8_t __arm_vcvtnq_s16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_s32_f32))) int32x4_t __arm_vcvtnq_s32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_u16_f16))) uint16x8_t __arm_vcvtnq_u16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_u32_f32))) uint32x4_t __arm_vcvtnq_u32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_x_s16_f16))) int16x8_t __arm_vcvtnq_x_s16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_x_s32_f32))) int32x4_t __arm_vcvtnq_x_s32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_x_u16_f16))) uint16x8_t __arm_vcvtnq_x_u16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_x_u32_f32))) uint32x4_t __arm_vcvtnq_x_u32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_m_s16_f16))) int16x8_t __arm_vcvtpq_m_s16_f16(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_m_s16_f16))) int16x8_t __arm_vcvtpq_m(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_m_s32_f32))) int32x4_t __arm_vcvtpq_m_s32_f32(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_m_s32_f32))) int32x4_t __arm_vcvtpq_m(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_m_u16_f16))) uint16x8_t __arm_vcvtpq_m_u16_f16(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_m_u16_f16))) uint16x8_t __arm_vcvtpq_m(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_m_u32_f32))) uint32x4_t __arm_vcvtpq_m_u32_f32(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_m_u32_f32))) uint32x4_t __arm_vcvtpq_m(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_s16_f16))) int16x8_t __arm_vcvtpq_s16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_s32_f32))) int32x4_t __arm_vcvtpq_s32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_u16_f16))) uint16x8_t __arm_vcvtpq_u16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_u32_f32))) uint32x4_t __arm_vcvtpq_u32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_x_s16_f16))) int16x8_t __arm_vcvtpq_x_s16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_x_s32_f32))) int32x4_t __arm_vcvtpq_x_s32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_x_u16_f16))) uint16x8_t __arm_vcvtpq_x_u16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_x_u32_f32))) uint32x4_t __arm_vcvtpq_x_u32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_f16_s16))) float16x8_t __arm_vcvtq_f16_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_f16_s16))) float16x8_t __arm_vcvtq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_f16_u16))) float16x8_t __arm_vcvtq_f16_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_f16_u16))) float16x8_t __arm_vcvtq(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_f32_s32))) float32x4_t __arm_vcvtq_f32_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_f32_s32))) float32x4_t __arm_vcvtq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_f32_u32))) float32x4_t __arm_vcvtq_f32_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_f32_u32))) float32x4_t __arm_vcvtq(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_f16_s16))) float16x8_t __arm_vcvtq_m_f16_s16(float16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_f16_s16))) float16x8_t __arm_vcvtq_m(float16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_f16_u16))) float16x8_t __arm_vcvtq_m_f16_u16(float16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_f16_u16))) float16x8_t __arm_vcvtq_m(float16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_f32_s32))) float32x4_t __arm_vcvtq_m_f32_s32(float32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_f32_s32))) float32x4_t __arm_vcvtq_m(float32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_f32_u32))) float32x4_t __arm_vcvtq_m_f32_u32(float32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_f32_u32))) float32x4_t __arm_vcvtq_m(float32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_f16_s16))) float16x8_t __arm_vcvtq_m_n_f16_s16(float16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_f16_s16))) float16x8_t __arm_vcvtq_m_n(float16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_f16_u16))) float16x8_t __arm_vcvtq_m_n_f16_u16(float16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_f16_u16))) float16x8_t __arm_vcvtq_m_n(float16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_f32_s32))) float32x4_t __arm_vcvtq_m_n_f32_s32(float32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_f32_s32))) float32x4_t __arm_vcvtq_m_n(float32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_f32_u32))) float32x4_t __arm_vcvtq_m_n_f32_u32(float32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_f32_u32))) float32x4_t __arm_vcvtq_m_n(float32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_s16_f16))) int16x8_t __arm_vcvtq_m_n_s16_f16(int16x8_t, float16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_s16_f16))) int16x8_t __arm_vcvtq_m_n(int16x8_t, float16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_s32_f32))) int32x4_t __arm_vcvtq_m_n_s32_f32(int32x4_t, float32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_s32_f32))) int32x4_t __arm_vcvtq_m_n(int32x4_t, float32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_u16_f16))) uint16x8_t __arm_vcvtq_m_n_u16_f16(uint16x8_t, float16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_u16_f16))) uint16x8_t __arm_vcvtq_m_n(uint16x8_t, float16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_u32_f32))) uint32x4_t __arm_vcvtq_m_n_u32_f32(uint32x4_t, float32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_u32_f32))) uint32x4_t __arm_vcvtq_m_n(uint32x4_t, float32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_s16_f16))) int16x8_t __arm_vcvtq_m_s16_f16(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_s16_f16))) int16x8_t __arm_vcvtq_m(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_s32_f32))) int32x4_t __arm_vcvtq_m_s32_f32(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_s32_f32))) int32x4_t __arm_vcvtq_m(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_u16_f16))) uint16x8_t __arm_vcvtq_m_u16_f16(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_u16_f16))) uint16x8_t __arm_vcvtq_m(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_u32_f32))) uint32x4_t __arm_vcvtq_m_u32_f32(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_u32_f32))) uint32x4_t __arm_vcvtq_m(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_f16_s16))) float16x8_t __arm_vcvtq_n_f16_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_f16_s16))) float16x8_t __arm_vcvtq_n(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_f16_u16))) float16x8_t __arm_vcvtq_n_f16_u16(uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_f16_u16))) float16x8_t __arm_vcvtq_n(uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_f32_s32))) float32x4_t __arm_vcvtq_n_f32_s32(int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_f32_s32))) float32x4_t __arm_vcvtq_n(int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_f32_u32))) float32x4_t __arm_vcvtq_n_f32_u32(uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_f32_u32))) float32x4_t __arm_vcvtq_n(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_s16_f16))) int16x8_t __arm_vcvtq_n_s16_f16(float16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_s32_f32))) int32x4_t __arm_vcvtq_n_s32_f32(float32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_u16_f16))) uint16x8_t __arm_vcvtq_n_u16_f16(float16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_u32_f32))) uint32x4_t __arm_vcvtq_n_u32_f32(float32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_s16_f16))) int16x8_t __arm_vcvtq_s16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_s32_f32))) int32x4_t __arm_vcvtq_s32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_u16_f16))) uint16x8_t __arm_vcvtq_u16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_u32_f32))) uint32x4_t __arm_vcvtq_u32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_f16_s16))) float16x8_t __arm_vcvtq_x_f16_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_f16_s16))) float16x8_t __arm_vcvtq_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_f16_u16))) float16x8_t __arm_vcvtq_x_f16_u16(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_f16_u16))) float16x8_t __arm_vcvtq_x(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_f32_s32))) float32x4_t __arm_vcvtq_x_f32_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_f32_s32))) float32x4_t __arm_vcvtq_x(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_f32_u32))) float32x4_t __arm_vcvtq_x_f32_u32(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_f32_u32))) float32x4_t __arm_vcvtq_x(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_f16_s16))) float16x8_t __arm_vcvtq_x_n_f16_s16(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_f16_s16))) float16x8_t __arm_vcvtq_x_n(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_f16_u16))) float16x8_t __arm_vcvtq_x_n_f16_u16(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_f16_u16))) float16x8_t __arm_vcvtq_x_n(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_f32_s32))) float32x4_t __arm_vcvtq_x_n_f32_s32(int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_f32_s32))) float32x4_t __arm_vcvtq_x_n(int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_f32_u32))) float32x4_t __arm_vcvtq_x_n_f32_u32(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_f32_u32))) float32x4_t __arm_vcvtq_x_n(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_s16_f16))) int16x8_t __arm_vcvtq_x_n_s16_f16(float16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_s32_f32))) int32x4_t __arm_vcvtq_x_n_s32_f32(float32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_u16_f16))) uint16x8_t __arm_vcvtq_x_n_u16_f16(float16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_u32_f32))) uint32x4_t __arm_vcvtq_x_n_u32_f32(float32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_s16_f16))) int16x8_t __arm_vcvtq_x_s16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_s32_f32))) int32x4_t __arm_vcvtq_x_s32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_u16_f16))) uint16x8_t __arm_vcvtq_x_u16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_u32_f32))) uint32x4_t __arm_vcvtq_x_u32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvttq_f16_f32))) float16x8_t __arm_vcvttq_f16_f32(float16x8_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvttq_f32_f16))) float32x4_t __arm_vcvttq_f32_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvttq_m_f16_f32))) float16x8_t __arm_vcvttq_m_f16_f32(float16x8_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvttq_m_f32_f16))) float32x4_t __arm_vcvttq_m_f32_f16(float32x4_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvttq_x_f32_f16))) float32x4_t __arm_vcvttq_x_f32_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_f16))) float16x8_t __arm_vdupq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_f16))) float16x8_t __arm_vdupq_m(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_f32))) float32x4_t __arm_vdupq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_f32))) float32x4_t __arm_vdupq_m(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_n_f16))) float16x8_t __arm_vdupq_n_f16(float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_n_f32))) float32x4_t __arm_vdupq_n_f32(float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_x_n_f16))) float16x8_t __arm_vdupq_x_n_f16(float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_x_n_f32))) float32x4_t __arm_vdupq_x_n_f32(float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_f16))) float16x8_t __arm_veorq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_f16))) float16x8_t __arm_veorq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_f32))) float32x4_t __arm_veorq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_f32))) float32x4_t __arm_veorq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_f16))) float16x8_t __arm_veorq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_f16))) float16x8_t __arm_veorq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_f32))) float32x4_t __arm_veorq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_f32))) float32x4_t __arm_veorq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_f16))) float16x8_t __arm_veorq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_f16))) float16x8_t __arm_veorq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_f32))) float32x4_t __arm_veorq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_f32))) float32x4_t __arm_veorq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_f16))) float16x8_t __arm_vfmaq_f16(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_f16))) float16x8_t __arm_vfmaq(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_f32))) float32x4_t __arm_vfmaq_f32(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_f32))) float32x4_t __arm_vfmaq(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_m_f16))) float16x8_t __arm_vfmaq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_m_f16))) float16x8_t __arm_vfmaq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_m_f32))) float32x4_t __arm_vfmaq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_m_f32))) float32x4_t __arm_vfmaq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_m_n_f16))) float16x8_t __arm_vfmaq_m_n_f16(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_m_n_f16))) float16x8_t __arm_vfmaq_m(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_m_n_f32))) float32x4_t __arm_vfmaq_m_n_f32(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_m_n_f32))) float32x4_t __arm_vfmaq_m(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_n_f16))) float16x8_t __arm_vfmaq_n_f16(float16x8_t, float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_n_f16))) float16x8_t __arm_vfmaq(float16x8_t, float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_n_f32))) float32x4_t __arm_vfmaq_n_f32(float32x4_t, float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_n_f32))) float32x4_t __arm_vfmaq(float32x4_t, float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmasq_m_n_f16))) float16x8_t __arm_vfmasq_m_n_f16(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmasq_m_n_f16))) float16x8_t __arm_vfmasq_m(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmasq_m_n_f32))) float32x4_t __arm_vfmasq_m_n_f32(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmasq_m_n_f32))) float32x4_t __arm_vfmasq_m(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmasq_n_f16))) float16x8_t __arm_vfmasq_n_f16(float16x8_t, float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmasq_n_f16))) float16x8_t __arm_vfmasq(float16x8_t, float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmasq_n_f32))) float32x4_t __arm_vfmasq_n_f32(float32x4_t, float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmasq_n_f32))) float32x4_t __arm_vfmasq(float32x4_t, float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmsq_f16))) float16x8_t __arm_vfmsq_f16(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmsq_f16))) float16x8_t __arm_vfmsq(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmsq_f32))) float32x4_t __arm_vfmsq_f32(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmsq_f32))) float32x4_t __arm_vfmsq(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmsq_m_f16))) float16x8_t __arm_vfmsq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmsq_m_f16))) float16x8_t __arm_vfmsq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmsq_m_f32))) float32x4_t __arm_vfmsq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmsq_m_f32))) float32x4_t __arm_vfmsq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_f16))) float16_t __arm_vgetq_lane_f16(float16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_f16))) float16_t __arm_vgetq_lane(float16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_f32))) float32_t __arm_vgetq_lane_f32(float32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_f32))) float32_t __arm_vgetq_lane(float32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_f16))) float16x8_t __arm_vld1q_f16(const float16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_f16))) float16x8_t __arm_vld1q(const float16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_f32))) float32x4_t __arm_vld1q_f32(const float32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_f32))) float32x4_t __arm_vld1q(const float32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_f16))) float16x8_t __arm_vld1q_z_f16(const float16_t *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_f16))) float16x8_t __arm_vld1q_z(const float16_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_f32))) float32x4_t __arm_vld1q_z_f32(const float32_t *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_f32))) float32x4_t __arm_vld1q_z(const float32_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld2q_f16))) float16x8x2_t __arm_vld2q_f16(const float16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld2q_f16))) float16x8x2_t __arm_vld2q(const float16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld2q_f32))) float32x4x2_t __arm_vld2q_f32(const float32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld2q_f32))) float32x4x2_t __arm_vld2q(const float32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld4q_f16))) float16x8x4_t __arm_vld4q_f16(const float16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld4q_f16))) float16x8x4_t __arm_vld4q(const float16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld4q_f32))) float32x4x4_t __arm_vld4q_f32(const float32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld4q_f32))) float32x4x4_t __arm_vld4q(const float32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_f16))) float16x8_t __arm_vldrhq_f16(const float16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_f16))) float16x8_t __arm_vldrhq_gather_offset_f16(const float16_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_f16))) float16x8_t __arm_vldrhq_gather_offset(const float16_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_f16))) float16x8_t __arm_vldrhq_gather_offset_z_f16(const float16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_f16))) float16x8_t __arm_vldrhq_gather_offset_z(const float16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_f16))) float16x8_t __arm_vldrhq_gather_shifted_offset_f16(const float16_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_f16))) float16x8_t __arm_vldrhq_gather_shifted_offset(const float16_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_f16))) float16x8_t __arm_vldrhq_gather_shifted_offset_z_f16(const float16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_f16))) float16x8_t __arm_vldrhq_gather_shifted_offset_z(const float16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_z_f16))) float16x8_t __arm_vldrhq_z_f16(const float16_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_f32))) float32x4_t __arm_vldrwq_f32(const float32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_f32))) float32x4_t __arm_vldrwq_gather_base_f32(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_wb_f32))) float32x4_t __arm_vldrwq_gather_base_wb_f32(uint32x4_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_wb_z_f32))) float32x4_t __arm_vldrwq_gather_base_wb_z_f32(uint32x4_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_z_f32))) float32x4_t __arm_vldrwq_gather_base_z_f32(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_f32))) float32x4_t __arm_vldrwq_gather_offset_f32(const float32_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_f32))) float32x4_t __arm_vldrwq_gather_offset(const float32_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_z_f32))) float32x4_t __arm_vldrwq_gather_offset_z_f32(const float32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_z_f32))) float32x4_t __arm_vldrwq_gather_offset_z(const float32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_f32))) float32x4_t __arm_vldrwq_gather_shifted_offset_f32(const float32_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_f32))) float32x4_t __arm_vldrwq_gather_shifted_offset(const float32_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_f32))) float32x4_t __arm_vldrwq_gather_shifted_offset_z_f32(const float32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_f32))) float32x4_t __arm_vldrwq_gather_shifted_offset_z(const float32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_z_f32))) float32x4_t __arm_vldrwq_z_f32(const float32_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmaq_f16))) float16x8_t __arm_vmaxnmaq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmaq_f16))) float16x8_t __arm_vmaxnmaq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmaq_f32))) float32x4_t __arm_vmaxnmaq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmaq_f32))) float32x4_t __arm_vmaxnmaq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmaq_m_f16))) float16x8_t __arm_vmaxnmaq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmaq_m_f16))) float16x8_t __arm_vmaxnmaq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmaq_m_f32))) float32x4_t __arm_vmaxnmaq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmaq_m_f32))) float32x4_t __arm_vmaxnmaq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmavq_f16))) float16_t __arm_vmaxnmavq_f16(float16_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmavq_f16))) float16_t __arm_vmaxnmavq(float16_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmavq_f32))) float32_t __arm_vmaxnmavq_f32(float32_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmavq_f32))) float32_t __arm_vmaxnmavq(float32_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmavq_p_f16))) float16_t __arm_vmaxnmavq_p_f16(float16_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmavq_p_f16))) float16_t __arm_vmaxnmavq_p(float16_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmavq_p_f32))) float32_t __arm_vmaxnmavq_p_f32(float32_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmavq_p_f32))) float32_t __arm_vmaxnmavq_p(float32_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_f16))) float16x8_t __arm_vmaxnmq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_f16))) float16x8_t __arm_vmaxnmq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_f32))) float32x4_t __arm_vmaxnmq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_f32))) float32x4_t __arm_vmaxnmq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_m_f16))) float16x8_t __arm_vmaxnmq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_m_f16))) float16x8_t __arm_vmaxnmq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_m_f32))) float32x4_t __arm_vmaxnmq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_m_f32))) float32x4_t __arm_vmaxnmq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_x_f16))) float16x8_t __arm_vmaxnmq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_x_f16))) float16x8_t __arm_vmaxnmq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_x_f32))) float32x4_t __arm_vmaxnmq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_x_f32))) float32x4_t __arm_vmaxnmq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmvq_f16))) float16_t __arm_vmaxnmvq_f16(float16_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmvq_f16))) float16_t __arm_vmaxnmvq(float16_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmvq_f32))) float32_t __arm_vmaxnmvq_f32(float32_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmvq_f32))) float32_t __arm_vmaxnmvq(float32_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmvq_p_f16))) float16_t __arm_vmaxnmvq_p_f16(float16_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmvq_p_f16))) float16_t __arm_vmaxnmvq_p(float16_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmvq_p_f32))) float32_t __arm_vmaxnmvq_p_f32(float32_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmvq_p_f32))) float32_t __arm_vmaxnmvq_p(float32_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmaq_f16))) float16x8_t __arm_vminnmaq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmaq_f16))) float16x8_t __arm_vminnmaq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmaq_f32))) float32x4_t __arm_vminnmaq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmaq_f32))) float32x4_t __arm_vminnmaq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmaq_m_f16))) float16x8_t __arm_vminnmaq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmaq_m_f16))) float16x8_t __arm_vminnmaq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmaq_m_f32))) float32x4_t __arm_vminnmaq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmaq_m_f32))) float32x4_t __arm_vminnmaq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmavq_f16))) float16_t __arm_vminnmavq_f16(float16_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmavq_f16))) float16_t __arm_vminnmavq(float16_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmavq_f32))) float32_t __arm_vminnmavq_f32(float32_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmavq_f32))) float32_t __arm_vminnmavq(float32_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmavq_p_f16))) float16_t __arm_vminnmavq_p_f16(float16_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmavq_p_f16))) float16_t __arm_vminnmavq_p(float16_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmavq_p_f32))) float32_t __arm_vminnmavq_p_f32(float32_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmavq_p_f32))) float32_t __arm_vminnmavq_p(float32_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_f16))) float16x8_t __arm_vminnmq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_f16))) float16x8_t __arm_vminnmq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_f32))) float32x4_t __arm_vminnmq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_f32))) float32x4_t __arm_vminnmq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_m_f16))) float16x8_t __arm_vminnmq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_m_f16))) float16x8_t __arm_vminnmq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_m_f32))) float32x4_t __arm_vminnmq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_m_f32))) float32x4_t __arm_vminnmq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_x_f16))) float16x8_t __arm_vminnmq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_x_f16))) float16x8_t __arm_vminnmq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_x_f32))) float32x4_t __arm_vminnmq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_x_f32))) float32x4_t __arm_vminnmq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmvq_f16))) float16_t __arm_vminnmvq_f16(float16_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmvq_f16))) float16_t __arm_vminnmvq(float16_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmvq_f32))) float32_t __arm_vminnmvq_f32(float32_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmvq_f32))) float32_t __arm_vminnmvq(float32_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmvq_p_f16))) float16_t __arm_vminnmvq_p_f16(float16_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmvq_p_f16))) float16_t __arm_vminnmvq_p(float16_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmvq_p_f32))) float32_t __arm_vminnmvq_p_f32(float32_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmvq_p_f32))) float32_t __arm_vminnmvq_p(float32_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_f16))) float16x8_t __arm_vmulq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_f16))) float16x8_t __arm_vmulq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_f32))) float32x4_t __arm_vmulq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_f32))) float32x4_t __arm_vmulq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_f16))) float16x8_t __arm_vmulq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_f16))) float16x8_t __arm_vmulq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_f32))) float32x4_t __arm_vmulq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_f32))) float32x4_t __arm_vmulq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_f16))) float16x8_t __arm_vmulq_m_n_f16(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_f16))) float16x8_t __arm_vmulq_m(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_f32))) float32x4_t __arm_vmulq_m_n_f32(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_f32))) float32x4_t __arm_vmulq_m(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_f16))) float16x8_t __arm_vmulq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_f16))) float16x8_t __arm_vmulq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_f32))) float32x4_t __arm_vmulq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_f32))) float32x4_t __arm_vmulq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_f16))) float16x8_t __arm_vmulq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_f16))) float16x8_t __arm_vmulq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_f32))) float32x4_t __arm_vmulq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_f32))) float32x4_t __arm_vmulq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_f16))) float16x8_t __arm_vmulq_x_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_f16))) float16x8_t __arm_vmulq_x(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_f32))) float32x4_t __arm_vmulq_x_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_f32))) float32x4_t __arm_vmulq_x(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_f16))) float16x8_t __arm_vnegq_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_f16))) float16x8_t __arm_vnegq(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_f32))) float32x4_t __arm_vnegq_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_f32))) float32x4_t __arm_vnegq(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_f16))) float16x8_t __arm_vnegq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_f16))) float16x8_t __arm_vnegq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_f32))) float32x4_t __arm_vnegq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_f32))) float32x4_t __arm_vnegq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_f16))) float16x8_t __arm_vnegq_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_f16))) float16x8_t __arm_vnegq_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_f32))) float32x4_t __arm_vnegq_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_f32))) float32x4_t __arm_vnegq_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_f16))) float16x8_t __arm_vornq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_f16))) float16x8_t __arm_vornq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_f32))) float32x4_t __arm_vornq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_f32))) float32x4_t __arm_vornq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_f16))) float16x8_t __arm_vornq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_f16))) float16x8_t __arm_vornq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_f32))) float32x4_t __arm_vornq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_f32))) float32x4_t __arm_vornq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_f16))) float16x8_t __arm_vornq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_f16))) float16x8_t __arm_vornq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_f32))) float32x4_t __arm_vornq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_f32))) float32x4_t __arm_vornq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_f16))) float16x8_t __arm_vorrq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_f16))) float16x8_t __arm_vorrq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_f32))) float32x4_t __arm_vorrq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_f32))) float32x4_t __arm_vorrq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_f16))) float16x8_t __arm_vorrq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_f16))) float16x8_t __arm_vorrq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_f32))) float32x4_t __arm_vorrq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_f32))) float32x4_t __arm_vorrq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_f16))) float16x8_t __arm_vorrq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_f16))) float16x8_t __arm_vorrq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_f32))) float32x4_t __arm_vorrq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_f32))) float32x4_t __arm_vorrq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_f16))) float16x8_t __arm_vpselq_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_f16))) float16x8_t __arm_vpselq(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_f32))) float32x4_t __arm_vpselq_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_f32))) float32x4_t __arm_vpselq(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_f32))) float16x8_t __arm_vreinterpretq_f16_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_f32))) float16x8_t __arm_vreinterpretq_f16(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_s16))) float16x8_t __arm_vreinterpretq_f16_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_s16))) float16x8_t __arm_vreinterpretq_f16(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_s32))) float16x8_t __arm_vreinterpretq_f16_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_s32))) float16x8_t __arm_vreinterpretq_f16(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_s64))) float16x8_t __arm_vreinterpretq_f16_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_s64))) float16x8_t __arm_vreinterpretq_f16(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_s8))) float16x8_t __arm_vreinterpretq_f16_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_s8))) float16x8_t __arm_vreinterpretq_f16(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_u16))) float16x8_t __arm_vreinterpretq_f16_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_u16))) float16x8_t __arm_vreinterpretq_f16(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_u32))) float16x8_t __arm_vreinterpretq_f16_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_u32))) float16x8_t __arm_vreinterpretq_f16(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_u64))) float16x8_t __arm_vreinterpretq_f16_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_u64))) float16x8_t __arm_vreinterpretq_f16(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_u8))) float16x8_t __arm_vreinterpretq_f16_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_u8))) float16x8_t __arm_vreinterpretq_f16(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_f16))) float32x4_t __arm_vreinterpretq_f32_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_f16))) float32x4_t __arm_vreinterpretq_f32(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_s16))) float32x4_t __arm_vreinterpretq_f32_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_s16))) float32x4_t __arm_vreinterpretq_f32(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_s32))) float32x4_t __arm_vreinterpretq_f32_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_s32))) float32x4_t __arm_vreinterpretq_f32(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_s64))) float32x4_t __arm_vreinterpretq_f32_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_s64))) float32x4_t __arm_vreinterpretq_f32(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_s8))) float32x4_t __arm_vreinterpretq_f32_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_s8))) float32x4_t __arm_vreinterpretq_f32(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_u16))) float32x4_t __arm_vreinterpretq_f32_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_u16))) float32x4_t __arm_vreinterpretq_f32(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_u32))) float32x4_t __arm_vreinterpretq_f32_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_u32))) float32x4_t __arm_vreinterpretq_f32(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_u64))) float32x4_t __arm_vreinterpretq_f32_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_u64))) float32x4_t __arm_vreinterpretq_f32(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_u8))) float32x4_t __arm_vreinterpretq_f32_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_u8))) float32x4_t __arm_vreinterpretq_f32(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_f16))) int16x8_t __arm_vreinterpretq_s16_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_f16))) int16x8_t __arm_vreinterpretq_s16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_f32))) int16x8_t __arm_vreinterpretq_s16_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_f32))) int16x8_t __arm_vreinterpretq_s16(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_f16))) int32x4_t __arm_vreinterpretq_s32_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_f16))) int32x4_t __arm_vreinterpretq_s32(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_f32))) int32x4_t __arm_vreinterpretq_s32_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_f32))) int32x4_t __arm_vreinterpretq_s32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_f16))) int64x2_t __arm_vreinterpretq_s64_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_f16))) int64x2_t __arm_vreinterpretq_s64(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_f32))) int64x2_t __arm_vreinterpretq_s64_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_f32))) int64x2_t __arm_vreinterpretq_s64(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_f16))) int8x16_t __arm_vreinterpretq_s8_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_f16))) int8x16_t __arm_vreinterpretq_s8(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_f32))) int8x16_t __arm_vreinterpretq_s8_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_f32))) int8x16_t __arm_vreinterpretq_s8(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_f16))) uint16x8_t __arm_vreinterpretq_u16_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_f16))) uint16x8_t __arm_vreinterpretq_u16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_f32))) uint16x8_t __arm_vreinterpretq_u16_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_f32))) uint16x8_t __arm_vreinterpretq_u16(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_f16))) uint32x4_t __arm_vreinterpretq_u32_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_f16))) uint32x4_t __arm_vreinterpretq_u32(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_f32))) uint32x4_t __arm_vreinterpretq_u32_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_f32))) uint32x4_t __arm_vreinterpretq_u32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_f16))) uint64x2_t __arm_vreinterpretq_u64_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_f16))) uint64x2_t __arm_vreinterpretq_u64(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_f32))) uint64x2_t __arm_vreinterpretq_u64_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_f32))) uint64x2_t __arm_vreinterpretq_u64(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_f16))) uint8x16_t __arm_vreinterpretq_u8_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_f16))) uint8x16_t __arm_vreinterpretq_u8(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_f32))) uint8x16_t __arm_vreinterpretq_u8_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_f32))) uint8x16_t __arm_vreinterpretq_u8(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_f16))) float16x8_t __arm_vrev32q_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_f16))) float16x8_t __arm_vrev32q(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_f16))) float16x8_t __arm_vrev32q_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_f16))) float16x8_t __arm_vrev32q_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_f16))) float16x8_t __arm_vrev32q_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_f16))) float16x8_t __arm_vrev32q_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_f16))) float16x8_t __arm_vrev64q_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_f16))) float16x8_t __arm_vrev64q(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_f32))) float32x4_t __arm_vrev64q_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_f32))) float32x4_t __arm_vrev64q(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_f16))) float16x8_t __arm_vrev64q_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_f16))) float16x8_t __arm_vrev64q_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_f32))) float32x4_t __arm_vrev64q_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_f32))) float32x4_t __arm_vrev64q_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_f16))) float16x8_t __arm_vrev64q_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_f16))) float16x8_t __arm_vrev64q_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_f32))) float32x4_t __arm_vrev64q_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_f32))) float32x4_t __arm_vrev64q_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_f16))) float16x8_t __arm_vrndaq_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_f16))) float16x8_t __arm_vrndaq(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_f32))) float32x4_t __arm_vrndaq_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_f32))) float32x4_t __arm_vrndaq(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_m_f16))) float16x8_t __arm_vrndaq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_m_f16))) float16x8_t __arm_vrndaq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_m_f32))) float32x4_t __arm_vrndaq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_m_f32))) float32x4_t __arm_vrndaq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_x_f16))) float16x8_t __arm_vrndaq_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_x_f16))) float16x8_t __arm_vrndaq_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_x_f32))) float32x4_t __arm_vrndaq_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_x_f32))) float32x4_t __arm_vrndaq_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_f16))) float16x8_t __arm_vrndmq_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_f16))) float16x8_t __arm_vrndmq(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_f32))) float32x4_t __arm_vrndmq_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_f32))) float32x4_t __arm_vrndmq(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_m_f16))) float16x8_t __arm_vrndmq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_m_f16))) float16x8_t __arm_vrndmq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_m_f32))) float32x4_t __arm_vrndmq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_m_f32))) float32x4_t __arm_vrndmq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_x_f16))) float16x8_t __arm_vrndmq_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_x_f16))) float16x8_t __arm_vrndmq_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_x_f32))) float32x4_t __arm_vrndmq_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_x_f32))) float32x4_t __arm_vrndmq_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_f16))) float16x8_t __arm_vrndnq_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_f16))) float16x8_t __arm_vrndnq(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_f32))) float32x4_t __arm_vrndnq_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_f32))) float32x4_t __arm_vrndnq(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_m_f16))) float16x8_t __arm_vrndnq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_m_f16))) float16x8_t __arm_vrndnq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_m_f32))) float32x4_t __arm_vrndnq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_m_f32))) float32x4_t __arm_vrndnq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_x_f16))) float16x8_t __arm_vrndnq_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_x_f16))) float16x8_t __arm_vrndnq_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_x_f32))) float32x4_t __arm_vrndnq_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_x_f32))) float32x4_t __arm_vrndnq_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_f16))) float16x8_t __arm_vrndpq_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_f16))) float16x8_t __arm_vrndpq(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_f32))) float32x4_t __arm_vrndpq_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_f32))) float32x4_t __arm_vrndpq(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_m_f16))) float16x8_t __arm_vrndpq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_m_f16))) float16x8_t __arm_vrndpq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_m_f32))) float32x4_t __arm_vrndpq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_m_f32))) float32x4_t __arm_vrndpq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_x_f16))) float16x8_t __arm_vrndpq_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_x_f16))) float16x8_t __arm_vrndpq_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_x_f32))) float32x4_t __arm_vrndpq_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_x_f32))) float32x4_t __arm_vrndpq_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndq_f16))) float16x8_t __arm_vrndq_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndq_f16))) float16x8_t __arm_vrndq(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndq_f32))) float32x4_t __arm_vrndq_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndq_f32))) float32x4_t __arm_vrndq(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndq_m_f16))) float16x8_t __arm_vrndq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndq_m_f16))) float16x8_t __arm_vrndq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndq_m_f32))) float32x4_t __arm_vrndq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndq_m_f32))) float32x4_t __arm_vrndq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndq_x_f16))) float16x8_t __arm_vrndq_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndq_x_f16))) float16x8_t __arm_vrndq_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndq_x_f32))) float32x4_t __arm_vrndq_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndq_x_f32))) float32x4_t __arm_vrndq_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_f16))) float16x8_t __arm_vrndxq_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_f16))) float16x8_t __arm_vrndxq(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_f32))) float32x4_t __arm_vrndxq_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_f32))) float32x4_t __arm_vrndxq(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_m_f16))) float16x8_t __arm_vrndxq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_m_f16))) float16x8_t __arm_vrndxq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_m_f32))) float32x4_t __arm_vrndxq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_m_f32))) float32x4_t __arm_vrndxq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_x_f16))) float16x8_t __arm_vrndxq_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_x_f16))) float16x8_t __arm_vrndxq_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_x_f32))) float32x4_t __arm_vrndxq_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_x_f32))) float32x4_t __arm_vrndxq_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_f16))) float16x8_t __arm_vsetq_lane_f16(float16_t, float16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_f16))) float16x8_t __arm_vsetq_lane(float16_t, float16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_f32))) float32x4_t __arm_vsetq_lane_f32(float32_t, float32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_f32))) float32x4_t __arm_vsetq_lane(float32_t, float32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_f16))) void __arm_vst1q_f16(float16_t *, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_f16))) void __arm_vst1q(float16_t *, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_f32))) void __arm_vst1q_f32(float32_t *, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_f32))) void __arm_vst1q(float32_t *, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_f16))) void __arm_vst1q_p_f16(float16_t *, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_f16))) void __arm_vst1q_p(float16_t *, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_f32))) void __arm_vst1q_p_f32(float32_t *, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_f32))) void __arm_vst1q_p(float32_t *, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst2q_f16))) void __arm_vst2q_f16(float16_t *, float16x8x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst2q_f16))) void __arm_vst2q(float16_t *, float16x8x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst2q_f32))) void __arm_vst2q_f32(float32_t *, float32x4x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst2q_f32))) void __arm_vst2q(float32_t *, float32x4x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst4q_f16))) void __arm_vst4q_f16(float16_t *, float16x8x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst4q_f16))) void __arm_vst4q(float16_t *, float16x8x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst4q_f32))) void __arm_vst4q_f32(float32_t *, float32x4x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst4q_f32))) void __arm_vst4q(float32_t *, float32x4x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_f16))) void __arm_vstrhq_f16(float16_t *, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_f16))) void __arm_vstrhq(float16_t *, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_f16))) void __arm_vstrhq_p_f16(float16_t *, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_f16))) void __arm_vstrhq_p(float16_t *, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_f16))) void __arm_vstrhq_scatter_offset_f16(float16_t *, uint16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_f16))) void __arm_vstrhq_scatter_offset(float16_t *, uint16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_f16))) void __arm_vstrhq_scatter_offset_p_f16(float16_t *, uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_f16))) void __arm_vstrhq_scatter_offset_p(float16_t *, uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_f16))) void __arm_vstrhq_scatter_shifted_offset_f16(float16_t *, uint16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_f16))) void __arm_vstrhq_scatter_shifted_offset(float16_t *, uint16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_f16))) void __arm_vstrhq_scatter_shifted_offset_p_f16(float16_t *, uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_f16))) void __arm_vstrhq_scatter_shifted_offset_p(float16_t *, uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_f32))) void __arm_vstrwq_f32(float32_t *, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_f32))) void __arm_vstrwq(float32_t *, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_p_f32))) void __arm_vstrwq_p_f32(float32_t *, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_p_f32))) void __arm_vstrwq_p(float32_t *, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_f32))) void __arm_vstrwq_scatter_base_f32(uint32x4_t, int, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_f32))) void __arm_vstrwq_scatter_base(uint32x4_t, int, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_p_f32))) void __arm_vstrwq_scatter_base_p_f32(uint32x4_t, int, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_p_f32))) void __arm_vstrwq_scatter_base_p(uint32x4_t, int, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_f32))) void __arm_vstrwq_scatter_base_wb_f32(uint32x4_t *, int, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_f32))) void __arm_vstrwq_scatter_base_wb(uint32x4_t *, int, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_f32))) void __arm_vstrwq_scatter_base_wb_p_f32(uint32x4_t *, int, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_f32))) void __arm_vstrwq_scatter_base_wb_p(uint32x4_t *, int, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_f32))) void __arm_vstrwq_scatter_offset_f32(float32_t *, uint32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_f32))) void __arm_vstrwq_scatter_offset(float32_t *, uint32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_f32))) void __arm_vstrwq_scatter_offset_p_f32(float32_t *, uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_f32))) void __arm_vstrwq_scatter_offset_p(float32_t *, uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_f32))) void __arm_vstrwq_scatter_shifted_offset_f32(float32_t *, uint32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_f32))) void __arm_vstrwq_scatter_shifted_offset(float32_t *, uint32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_f32))) void __arm_vstrwq_scatter_shifted_offset_p_f32(float32_t *, uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_f32))) void __arm_vstrwq_scatter_shifted_offset_p(float32_t *, uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_f16))) float16x8_t __arm_vsubq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_f16))) float16x8_t __arm_vsubq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_f32))) float32x4_t __arm_vsubq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_f32))) float32x4_t __arm_vsubq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_f16))) float16x8_t __arm_vsubq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_f16))) float16x8_t __arm_vsubq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_f32))) float32x4_t __arm_vsubq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_f32))) float32x4_t __arm_vsubq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_f16))) float16x8_t __arm_vsubq_m_n_f16(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_f16))) float16x8_t __arm_vsubq_m(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_f32))) float32x4_t __arm_vsubq_m_n_f32(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_f32))) float32x4_t __arm_vsubq_m(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_f16))) float16x8_t __arm_vsubq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_f16))) float16x8_t __arm_vsubq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_f32))) float32x4_t __arm_vsubq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_f32))) float32x4_t __arm_vsubq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_f16))) float16x8_t __arm_vsubq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_f16))) float16x8_t __arm_vsubq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_f32))) float32x4_t __arm_vsubq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_f32))) float32x4_t __arm_vsubq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_f16))) float16x8_t __arm_vsubq_x_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_f16))) float16x8_t __arm_vsubq_x(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_f32))) float32x4_t __arm_vsubq_x_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_f32))) float32x4_t __arm_vsubq_x(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_f16))) float16x8_t __arm_vuninitializedq_f16(); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_f32))) float32x4_t __arm_vuninitializedq_f32(); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_f16))) float16x8_t __arm_vuninitializedq(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_f32))) float32x4_t __arm_vuninitializedq(float32x4_t); #endif /* (__ARM_FEATURE_MVE & 2) */ #if (!defined __ARM_MVE_PRESERVE_USER_NAMESPACE) static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_asrl))) int64_t asrl(int64_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_lsll))) uint64_t lsll(uint64_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_sqrshr))) int32_t sqrshr(int32_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_sqrshrl))) int64_t sqrshrl(int64_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_sqrshrl_sat48))) int64_t sqrshrl_sat48(int64_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_sqshl))) int32_t sqshl(int32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_sqshll))) int64_t sqshll(int64_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_srshr))) int32_t srshr(int32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_srshrl))) int64_t srshrl(int64_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_uqrshl))) uint32_t uqrshl(uint32_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_uqrshll))) uint64_t uqrshll(uint64_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_uqrshll_sat48))) uint64_t uqrshll_sat48(uint64_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_uqshl))) uint32_t uqshl(uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_uqshll))) uint64_t uqshll(uint64_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_urshr))) uint32_t urshr(uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_urshrl))) uint64_t urshrl(uint64_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_s16))) uint32_t vabavq_p_s16(uint32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_s16))) uint32_t vabavq_p(uint32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_s32))) uint32_t vabavq_p_s32(uint32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_s32))) uint32_t vabavq_p(uint32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_s8))) uint32_t vabavq_p_s8(uint32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_s8))) uint32_t vabavq_p(uint32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_u16))) uint32_t vabavq_p_u16(uint32_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_u16))) uint32_t vabavq_p(uint32_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_u32))) uint32_t vabavq_p_u32(uint32_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_u32))) uint32_t vabavq_p(uint32_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_u8))) uint32_t vabavq_p_u8(uint32_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_p_u8))) uint32_t vabavq_p(uint32_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_s16))) uint32_t vabavq_s16(uint32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_s16))) uint32_t vabavq(uint32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_s32))) uint32_t vabavq_s32(uint32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_s32))) uint32_t vabavq(uint32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_s8))) uint32_t vabavq_s8(uint32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_s8))) uint32_t vabavq(uint32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_u16))) uint32_t vabavq_u16(uint32_t, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_u16))) uint32_t vabavq(uint32_t, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_u32))) uint32_t vabavq_u32(uint32_t, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_u32))) uint32_t vabavq(uint32_t, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabavq_u8))) uint32_t vabavq_u8(uint32_t, uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabavq_u8))) uint32_t vabavq(uint32_t, uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_s16))) int16x8_t vabdq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_s16))) int16x8_t vabdq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_s32))) int32x4_t vabdq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_s32))) int32x4_t vabdq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_s8))) int8x16_t vabdq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_s8))) int8x16_t vabdq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_u16))) uint16x8_t vabdq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_u16))) uint16x8_t vabdq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_u32))) uint32x4_t vabdq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_u32))) uint32x4_t vabdq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_u8))) uint8x16_t vabdq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_u8))) uint8x16_t vabdq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_s16))) int16x8_t vabdq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_s16))) int16x8_t vabdq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_s32))) int32x4_t vabdq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_s32))) int32x4_t vabdq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_s8))) int8x16_t vabdq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_s8))) int8x16_t vabdq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_u16))) uint16x8_t vabdq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_u16))) uint16x8_t vabdq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_u32))) uint32x4_t vabdq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_u32))) uint32x4_t vabdq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_u8))) uint8x16_t vabdq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_u8))) uint8x16_t vabdq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_s16))) int16x8_t vabdq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_s16))) int16x8_t vabdq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_s32))) int32x4_t vabdq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_s32))) int32x4_t vabdq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_s8))) int8x16_t vabdq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_s8))) int8x16_t vabdq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_u16))) uint16x8_t vabdq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_u16))) uint16x8_t vabdq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_u32))) uint32x4_t vabdq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_u32))) uint32x4_t vabdq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_u8))) uint8x16_t vabdq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_u8))) uint8x16_t vabdq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_s16))) int16x8_t vabsq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_s16))) int16x8_t vabsq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_s32))) int32x4_t vabsq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_s32))) int32x4_t vabsq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_s8))) int8x16_t vabsq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_s8))) int8x16_t vabsq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_s16))) int16x8_t vabsq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_s16))) int16x8_t vabsq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_s32))) int32x4_t vabsq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_s32))) int32x4_t vabsq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_s8))) int8x16_t vabsq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_s8))) int8x16_t vabsq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_s16))) int16x8_t vabsq_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_s16))) int16x8_t vabsq_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_s32))) int32x4_t vabsq_x_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_s32))) int32x4_t vabsq_x(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_s8))) int8x16_t vabsq_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_s8))) int8x16_t vabsq_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vadciq_m_s32))) int32x4_t vadciq_m_s32(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vadciq_m_s32))) int32x4_t vadciq_m(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vadciq_m_u32))) uint32x4_t vadciq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vadciq_m_u32))) uint32x4_t vadciq_m(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vadciq_s32))) int32x4_t vadciq_s32(int32x4_t, int32x4_t, unsigned *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vadciq_s32))) int32x4_t vadciq(int32x4_t, int32x4_t, unsigned *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vadciq_u32))) uint32x4_t vadciq_u32(uint32x4_t, uint32x4_t, unsigned *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vadciq_u32))) uint32x4_t vadciq(uint32x4_t, uint32x4_t, unsigned *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vadcq_m_s32))) int32x4_t vadcq_m_s32(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vadcq_m_s32))) int32x4_t vadcq_m(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vadcq_m_u32))) uint32x4_t vadcq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vadcq_m_u32))) uint32x4_t vadcq_m(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vadcq_s32))) int32x4_t vadcq_s32(int32x4_t, int32x4_t, unsigned *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vadcq_s32))) int32x4_t vadcq(int32x4_t, int32x4_t, unsigned *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vadcq_u32))) uint32x4_t vadcq_u32(uint32x4_t, uint32x4_t, unsigned *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vadcq_u32))) uint32x4_t vadcq(uint32x4_t, uint32x4_t, unsigned *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddlvaq_p_s32))) int64_t vaddlvaq_p_s32(int64_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddlvaq_p_s32))) int64_t vaddlvaq_p(int64_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddlvaq_p_u32))) uint64_t vaddlvaq_p_u32(uint64_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddlvaq_p_u32))) uint64_t vaddlvaq_p(uint64_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddlvaq_s32))) int64_t vaddlvaq_s32(int64_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddlvaq_s32))) int64_t vaddlvaq(int64_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddlvaq_u32))) uint64_t vaddlvaq_u32(uint64_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddlvaq_u32))) uint64_t vaddlvaq(uint64_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddlvq_p_s32))) int64_t vaddlvq_p_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddlvq_p_s32))) int64_t vaddlvq_p(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddlvq_p_u32))) uint64_t vaddlvq_p_u32(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddlvq_p_u32))) uint64_t vaddlvq_p(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddlvq_s32))) int64_t vaddlvq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddlvq_s32))) int64_t vaddlvq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddlvq_u32))) uint64_t vaddlvq_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddlvq_u32))) uint64_t vaddlvq(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_s16))) int16x8_t vaddq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_s16))) int16x8_t vaddq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_s32))) int32x4_t vaddq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_s32))) int32x4_t vaddq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_s8))) int8x16_t vaddq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_s8))) int8x16_t vaddq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_u16))) uint16x8_t vaddq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_u16))) uint16x8_t vaddq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_u32))) uint32x4_t vaddq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_u32))) uint32x4_t vaddq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_u8))) uint8x16_t vaddq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_u8))) uint8x16_t vaddq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_s16))) int16x8_t vaddq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_s16))) int16x8_t vaddq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_s32))) int32x4_t vaddq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_s32))) int32x4_t vaddq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_s8))) int8x16_t vaddq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_s8))) int8x16_t vaddq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_u16))) uint16x8_t vaddq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_u16))) uint16x8_t vaddq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_u32))) uint32x4_t vaddq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_u32))) uint32x4_t vaddq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_u8))) uint8x16_t vaddq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_u8))) uint8x16_t vaddq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_s16))) int16x8_t vaddq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_s16))) int16x8_t vaddq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_s32))) int32x4_t vaddq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_s32))) int32x4_t vaddq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_s8))) int8x16_t vaddq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_s8))) int8x16_t vaddq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_u16))) uint16x8_t vaddq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_u16))) uint16x8_t vaddq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_u32))) uint32x4_t vaddq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_u32))) uint32x4_t vaddq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_u8))) uint8x16_t vaddq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_u8))) uint8x16_t vaddq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_s16))) int16x8_t vaddq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_s16))) int16x8_t vaddq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_s32))) int32x4_t vaddq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_s32))) int32x4_t vaddq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_s8))) int8x16_t vaddq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_s8))) int8x16_t vaddq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_u16))) uint16x8_t vaddq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_u16))) uint16x8_t vaddq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_u32))) uint32x4_t vaddq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_u32))) uint32x4_t vaddq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_u8))) uint8x16_t vaddq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_u8))) uint8x16_t vaddq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_s16))) int16x8_t vaddq_x_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_s16))) int16x8_t vaddq_x(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_s32))) int32x4_t vaddq_x_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_s32))) int32x4_t vaddq_x(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_s8))) int8x16_t vaddq_x_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_s8))) int8x16_t vaddq_x(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_u16))) uint16x8_t vaddq_x_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_u16))) uint16x8_t vaddq_x(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_u32))) uint32x4_t vaddq_x_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_u32))) uint32x4_t vaddq_x(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_u8))) uint8x16_t vaddq_x_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_u8))) uint8x16_t vaddq_x(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_s16))) int16x8_t vaddq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_s16))) int16x8_t vaddq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_s32))) int32x4_t vaddq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_s32))) int32x4_t vaddq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_s8))) int8x16_t vaddq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_s8))) int8x16_t vaddq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_u16))) uint16x8_t vaddq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_u16))) uint16x8_t vaddq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_u32))) uint32x4_t vaddq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_u32))) uint32x4_t vaddq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_u8))) uint8x16_t vaddq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_u8))) uint8x16_t vaddq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_s16))) int32_t vaddvaq_p_s16(int32_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_s16))) int32_t vaddvaq_p(int32_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_s32))) int32_t vaddvaq_p_s32(int32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_s32))) int32_t vaddvaq_p(int32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_s8))) int32_t vaddvaq_p_s8(int32_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_s8))) int32_t vaddvaq_p(int32_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_u16))) uint32_t vaddvaq_p_u16(uint32_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_u16))) uint32_t vaddvaq_p(uint32_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_u32))) uint32_t vaddvaq_p_u32(uint32_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_u32))) uint32_t vaddvaq_p(uint32_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_u8))) uint32_t vaddvaq_p_u8(uint32_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_p_u8))) uint32_t vaddvaq_p(uint32_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_s16))) int32_t vaddvaq_s16(int32_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_s16))) int32_t vaddvaq(int32_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_s32))) int32_t vaddvaq_s32(int32_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_s32))) int32_t vaddvaq(int32_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_s8))) int32_t vaddvaq_s8(int32_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_s8))) int32_t vaddvaq(int32_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_u16))) uint32_t vaddvaq_u16(uint32_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_u16))) uint32_t vaddvaq(uint32_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_u32))) uint32_t vaddvaq_u32(uint32_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_u32))) uint32_t vaddvaq(uint32_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_u8))) uint32_t vaddvaq_u8(uint32_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvaq_u8))) uint32_t vaddvaq(uint32_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_s16))) int32_t vaddvq_p_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_s16))) int32_t vaddvq_p(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_s32))) int32_t vaddvq_p_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_s32))) int32_t vaddvq_p(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_s8))) int32_t vaddvq_p_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_s8))) int32_t vaddvq_p(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_u16))) uint32_t vaddvq_p_u16(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_u16))) uint32_t vaddvq_p(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_u32))) uint32_t vaddvq_p_u32(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_u32))) uint32_t vaddvq_p(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_u8))) uint32_t vaddvq_p_u8(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_p_u8))) uint32_t vaddvq_p(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_s16))) int32_t vaddvq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_s16))) int32_t vaddvq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_s32))) int32_t vaddvq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_s32))) int32_t vaddvq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_s8))) int32_t vaddvq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_s8))) int32_t vaddvq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_u16))) uint32_t vaddvq_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_u16))) uint32_t vaddvq(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_u32))) uint32_t vaddvq_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_u32))) uint32_t vaddvq(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_u8))) uint32_t vaddvq_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddvq_u8))) uint32_t vaddvq(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_s16))) int16x8_t vandq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_s16))) int16x8_t vandq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_s32))) int32x4_t vandq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_s32))) int32x4_t vandq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_s8))) int8x16_t vandq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_s8))) int8x16_t vandq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_u16))) uint16x8_t vandq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_u16))) uint16x8_t vandq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_u32))) uint32x4_t vandq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_u32))) uint32x4_t vandq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_u8))) uint8x16_t vandq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_u8))) uint8x16_t vandq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_s16))) int16x8_t vandq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_s16))) int16x8_t vandq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_s32))) int32x4_t vandq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_s32))) int32x4_t vandq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_s8))) int8x16_t vandq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_s8))) int8x16_t vandq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_u16))) uint16x8_t vandq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_u16))) uint16x8_t vandq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_u32))) uint32x4_t vandq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_u32))) uint32x4_t vandq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_u8))) uint8x16_t vandq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_u8))) uint8x16_t vandq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_s16))) int16x8_t vandq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_s16))) int16x8_t vandq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_s32))) int32x4_t vandq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_s32))) int32x4_t vandq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_s8))) int8x16_t vandq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_s8))) int8x16_t vandq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_u16))) uint16x8_t vandq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_u16))) uint16x8_t vandq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_u32))) uint32x4_t vandq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_u32))) uint32x4_t vandq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_u8))) uint8x16_t vandq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_u8))) uint8x16_t vandq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_n_s16))) int16x8_t vbicq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_n_s16))) int16x8_t vbicq_m_n(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_n_s32))) int32x4_t vbicq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_n_s32))) int32x4_t vbicq_m_n(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_n_u16))) uint16x8_t vbicq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_n_u16))) uint16x8_t vbicq_m_n(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_n_u32))) uint32x4_t vbicq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_n_u32))) uint32x4_t vbicq_m_n(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_s16))) int16x8_t vbicq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_s16))) int16x8_t vbicq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_s32))) int32x4_t vbicq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_s32))) int32x4_t vbicq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_s8))) int8x16_t vbicq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_s8))) int8x16_t vbicq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_u16))) uint16x8_t vbicq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_u16))) uint16x8_t vbicq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_u32))) uint32x4_t vbicq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_u32))) uint32x4_t vbicq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_u8))) uint8x16_t vbicq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_u8))) uint8x16_t vbicq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_n_s16))) int16x8_t vbicq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_n_s16))) int16x8_t vbicq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_n_s32))) int32x4_t vbicq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_n_s32))) int32x4_t vbicq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_n_u16))) uint16x8_t vbicq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_n_u16))) uint16x8_t vbicq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_n_u32))) uint32x4_t vbicq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_n_u32))) uint32x4_t vbicq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_s16))) int16x8_t vbicq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_s16))) int16x8_t vbicq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_s32))) int32x4_t vbicq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_s32))) int32x4_t vbicq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_s8))) int8x16_t vbicq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_s8))) int8x16_t vbicq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_u16))) uint16x8_t vbicq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_u16))) uint16x8_t vbicq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_u32))) uint32x4_t vbicq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_u32))) uint32x4_t vbicq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_u8))) uint8x16_t vbicq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_u8))) uint8x16_t vbicq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_s16))) int16x8_t vbicq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_s16))) int16x8_t vbicq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_s32))) int32x4_t vbicq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_s32))) int32x4_t vbicq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_s8))) int8x16_t vbicq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_s8))) int8x16_t vbicq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_u16))) uint16x8_t vbicq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_u16))) uint16x8_t vbicq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_u32))) uint32x4_t vbicq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_u32))) uint32x4_t vbicq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_u8))) uint8x16_t vbicq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_u8))) uint8x16_t vbicq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_s16))) int16x8_t vbrsrq_m_n_s16(int16x8_t, int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_s16))) int16x8_t vbrsrq_m(int16x8_t, int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_s32))) int32x4_t vbrsrq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_s32))) int32x4_t vbrsrq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_s8))) int8x16_t vbrsrq_m_n_s8(int8x16_t, int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_s8))) int8x16_t vbrsrq_m(int8x16_t, int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_u16))) uint16x8_t vbrsrq_m_n_u16(uint16x8_t, uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_u16))) uint16x8_t vbrsrq_m(uint16x8_t, uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_u32))) uint32x4_t vbrsrq_m_n_u32(uint32x4_t, uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_u32))) uint32x4_t vbrsrq_m(uint32x4_t, uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_u8))) uint8x16_t vbrsrq_m_n_u8(uint8x16_t, uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_u8))) uint8x16_t vbrsrq_m(uint8x16_t, uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_s16))) int16x8_t vbrsrq_n_s16(int16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_s16))) int16x8_t vbrsrq(int16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_s32))) int32x4_t vbrsrq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_s32))) int32x4_t vbrsrq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_s8))) int8x16_t vbrsrq_n_s8(int8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_s8))) int8x16_t vbrsrq(int8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_u16))) uint16x8_t vbrsrq_n_u16(uint16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_u16))) uint16x8_t vbrsrq(uint16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_u32))) uint32x4_t vbrsrq_n_u32(uint32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_u32))) uint32x4_t vbrsrq(uint32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_u8))) uint8x16_t vbrsrq_n_u8(uint8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_u8))) uint8x16_t vbrsrq(uint8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_s16))) int16x8_t vbrsrq_x_n_s16(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_s16))) int16x8_t vbrsrq_x(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_s32))) int32x4_t vbrsrq_x_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_s32))) int32x4_t vbrsrq_x(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_s8))) int8x16_t vbrsrq_x_n_s8(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_s8))) int8x16_t vbrsrq_x(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_u16))) uint16x8_t vbrsrq_x_n_u16(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_u16))) uint16x8_t vbrsrq_x(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_u32))) uint32x4_t vbrsrq_x_n_u32(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_u32))) uint32x4_t vbrsrq_x(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_u8))) uint8x16_t vbrsrq_x_n_u8(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_u8))) uint8x16_t vbrsrq_x(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_s16))) int16x8_t vcaddq_rot270_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_s16))) int16x8_t vcaddq_rot270_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_s32))) int32x4_t vcaddq_rot270_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_s32))) int32x4_t vcaddq_rot270_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_s8))) int8x16_t vcaddq_rot270_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_s8))) int8x16_t vcaddq_rot270_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_u16))) uint16x8_t vcaddq_rot270_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_u16))) uint16x8_t vcaddq_rot270_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_u32))) uint32x4_t vcaddq_rot270_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_u32))) uint32x4_t vcaddq_rot270_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_u8))) uint8x16_t vcaddq_rot270_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_u8))) uint8x16_t vcaddq_rot270_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_s16))) int16x8_t vcaddq_rot270_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_s16))) int16x8_t vcaddq_rot270(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_s32))) int32x4_t vcaddq_rot270_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_s32))) int32x4_t vcaddq_rot270(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_s8))) int8x16_t vcaddq_rot270_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_s8))) int8x16_t vcaddq_rot270(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_u16))) uint16x8_t vcaddq_rot270_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_u16))) uint16x8_t vcaddq_rot270(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_u32))) uint32x4_t vcaddq_rot270_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_u32))) uint32x4_t vcaddq_rot270(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_u8))) uint8x16_t vcaddq_rot270_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_u8))) uint8x16_t vcaddq_rot270(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_s16))) int16x8_t vcaddq_rot270_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_s16))) int16x8_t vcaddq_rot270_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_s32))) int32x4_t vcaddq_rot270_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_s32))) int32x4_t vcaddq_rot270_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_s8))) int8x16_t vcaddq_rot270_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_s8))) int8x16_t vcaddq_rot270_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_u16))) uint16x8_t vcaddq_rot270_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_u16))) uint16x8_t vcaddq_rot270_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_u32))) uint32x4_t vcaddq_rot270_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_u32))) uint32x4_t vcaddq_rot270_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_u8))) uint8x16_t vcaddq_rot270_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_u8))) uint8x16_t vcaddq_rot270_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_s16))) int16x8_t vcaddq_rot90_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_s16))) int16x8_t vcaddq_rot90_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_s32))) int32x4_t vcaddq_rot90_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_s32))) int32x4_t vcaddq_rot90_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_s8))) int8x16_t vcaddq_rot90_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_s8))) int8x16_t vcaddq_rot90_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_u16))) uint16x8_t vcaddq_rot90_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_u16))) uint16x8_t vcaddq_rot90_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_u32))) uint32x4_t vcaddq_rot90_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_u32))) uint32x4_t vcaddq_rot90_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_u8))) uint8x16_t vcaddq_rot90_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_u8))) uint8x16_t vcaddq_rot90_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_s16))) int16x8_t vcaddq_rot90_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_s16))) int16x8_t vcaddq_rot90(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_s32))) int32x4_t vcaddq_rot90_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_s32))) int32x4_t vcaddq_rot90(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_s8))) int8x16_t vcaddq_rot90_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_s8))) int8x16_t vcaddq_rot90(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_u16))) uint16x8_t vcaddq_rot90_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_u16))) uint16x8_t vcaddq_rot90(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_u32))) uint32x4_t vcaddq_rot90_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_u32))) uint32x4_t vcaddq_rot90(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_u8))) uint8x16_t vcaddq_rot90_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_u8))) uint8x16_t vcaddq_rot90(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_s16))) int16x8_t vcaddq_rot90_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_s16))) int16x8_t vcaddq_rot90_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_s32))) int32x4_t vcaddq_rot90_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_s32))) int32x4_t vcaddq_rot90_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_s8))) int8x16_t vcaddq_rot90_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_s8))) int8x16_t vcaddq_rot90_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_u16))) uint16x8_t vcaddq_rot90_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_u16))) uint16x8_t vcaddq_rot90_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_u32))) uint32x4_t vcaddq_rot90_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_u32))) uint32x4_t vcaddq_rot90_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_u8))) uint8x16_t vcaddq_rot90_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_u8))) uint8x16_t vcaddq_rot90_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_m_s16))) int16x8_t vclsq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_m_s16))) int16x8_t vclsq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_m_s32))) int32x4_t vclsq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_m_s32))) int32x4_t vclsq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_m_s8))) int8x16_t vclsq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_m_s8))) int8x16_t vclsq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_s16))) int16x8_t vclsq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_s16))) int16x8_t vclsq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_s32))) int32x4_t vclsq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_s32))) int32x4_t vclsq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_s8))) int8x16_t vclsq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_s8))) int8x16_t vclsq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_x_s16))) int16x8_t vclsq_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_x_s16))) int16x8_t vclsq_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_x_s32))) int32x4_t vclsq_x_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_x_s32))) int32x4_t vclsq_x(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclsq_x_s8))) int8x16_t vclsq_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclsq_x_s8))) int8x16_t vclsq_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_s16))) int16x8_t vclzq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_s16))) int16x8_t vclzq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_s32))) int32x4_t vclzq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_s32))) int32x4_t vclzq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_s8))) int8x16_t vclzq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_s8))) int8x16_t vclzq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_u16))) uint16x8_t vclzq_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_u16))) uint16x8_t vclzq_m(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_u32))) uint32x4_t vclzq_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_u32))) uint32x4_t vclzq_m(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_u8))) uint8x16_t vclzq_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_m_u8))) uint8x16_t vclzq_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_s16))) int16x8_t vclzq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_s16))) int16x8_t vclzq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_s32))) int32x4_t vclzq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_s32))) int32x4_t vclzq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_s8))) int8x16_t vclzq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_s8))) int8x16_t vclzq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_u16))) uint16x8_t vclzq_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_u16))) uint16x8_t vclzq(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_u32))) uint32x4_t vclzq_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_u32))) uint32x4_t vclzq(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_u8))) uint8x16_t vclzq_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_u8))) uint8x16_t vclzq(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_s16))) int16x8_t vclzq_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_s16))) int16x8_t vclzq_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_s32))) int32x4_t vclzq_x_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_s32))) int32x4_t vclzq_x(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_s8))) int8x16_t vclzq_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_s8))) int8x16_t vclzq_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_u16))) uint16x8_t vclzq_x_u16(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_u16))) uint16x8_t vclzq_x(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_u32))) uint32x4_t vclzq_x_u32(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_u32))) uint32x4_t vclzq_x(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_u8))) uint8x16_t vclzq_x_u8(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vclzq_x_u8))) uint8x16_t vclzq_x(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_n_u16))) mve_pred16_t vcmpcsq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_n_u16))) mve_pred16_t vcmpcsq_m(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_n_u32))) mve_pred16_t vcmpcsq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_n_u32))) mve_pred16_t vcmpcsq_m(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_n_u8))) mve_pred16_t vcmpcsq_m_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_n_u8))) mve_pred16_t vcmpcsq_m(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_u16))) mve_pred16_t vcmpcsq_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_u16))) mve_pred16_t vcmpcsq_m(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_u32))) mve_pred16_t vcmpcsq_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_u32))) mve_pred16_t vcmpcsq_m(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_u8))) mve_pred16_t vcmpcsq_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_m_u8))) mve_pred16_t vcmpcsq_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_n_u16))) mve_pred16_t vcmpcsq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_n_u16))) mve_pred16_t vcmpcsq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_n_u32))) mve_pred16_t vcmpcsq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_n_u32))) mve_pred16_t vcmpcsq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_n_u8))) mve_pred16_t vcmpcsq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_n_u8))) mve_pred16_t vcmpcsq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_u16))) mve_pred16_t vcmpcsq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_u16))) mve_pred16_t vcmpcsq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_u32))) mve_pred16_t vcmpcsq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_u32))) mve_pred16_t vcmpcsq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_u8))) mve_pred16_t vcmpcsq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpcsq_u8))) mve_pred16_t vcmpcsq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_s16))) mve_pred16_t vcmpeqq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_s16))) mve_pred16_t vcmpeqq_m(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_s32))) mve_pred16_t vcmpeqq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_s32))) mve_pred16_t vcmpeqq_m(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_s8))) mve_pred16_t vcmpeqq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_s8))) mve_pred16_t vcmpeqq_m(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_u16))) mve_pred16_t vcmpeqq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_u16))) mve_pred16_t vcmpeqq_m(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_u32))) mve_pred16_t vcmpeqq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_u32))) mve_pred16_t vcmpeqq_m(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_u8))) mve_pred16_t vcmpeqq_m_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_u8))) mve_pred16_t vcmpeqq_m(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_s16))) mve_pred16_t vcmpeqq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_s16))) mve_pred16_t vcmpeqq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_s32))) mve_pred16_t vcmpeqq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_s32))) mve_pred16_t vcmpeqq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_s8))) mve_pred16_t vcmpeqq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_s8))) mve_pred16_t vcmpeqq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_u16))) mve_pred16_t vcmpeqq_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_u16))) mve_pred16_t vcmpeqq_m(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_u32))) mve_pred16_t vcmpeqq_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_u32))) mve_pred16_t vcmpeqq_m(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_u8))) mve_pred16_t vcmpeqq_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_u8))) mve_pred16_t vcmpeqq_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_s16))) mve_pred16_t vcmpeqq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_s16))) mve_pred16_t vcmpeqq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_s32))) mve_pred16_t vcmpeqq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_s32))) mve_pred16_t vcmpeqq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_s8))) mve_pred16_t vcmpeqq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_s8))) mve_pred16_t vcmpeqq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_u16))) mve_pred16_t vcmpeqq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_u16))) mve_pred16_t vcmpeqq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_u32))) mve_pred16_t vcmpeqq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_u32))) mve_pred16_t vcmpeqq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_u8))) mve_pred16_t vcmpeqq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_u8))) mve_pred16_t vcmpeqq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_s16))) mve_pred16_t vcmpeqq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_s16))) mve_pred16_t vcmpeqq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_s32))) mve_pred16_t vcmpeqq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_s32))) mve_pred16_t vcmpeqq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_s8))) mve_pred16_t vcmpeqq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_s8))) mve_pred16_t vcmpeqq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_u16))) mve_pred16_t vcmpeqq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_u16))) mve_pred16_t vcmpeqq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_u32))) mve_pred16_t vcmpeqq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_u32))) mve_pred16_t vcmpeqq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_u8))) mve_pred16_t vcmpeqq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_u8))) mve_pred16_t vcmpeqq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_s16))) mve_pred16_t vcmpgeq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_s16))) mve_pred16_t vcmpgeq_m(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_s32))) mve_pred16_t vcmpgeq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_s32))) mve_pred16_t vcmpgeq_m(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_s8))) mve_pred16_t vcmpgeq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_s8))) mve_pred16_t vcmpgeq_m(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_s16))) mve_pred16_t vcmpgeq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_s16))) mve_pred16_t vcmpgeq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_s32))) mve_pred16_t vcmpgeq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_s32))) mve_pred16_t vcmpgeq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_s8))) mve_pred16_t vcmpgeq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_s8))) mve_pred16_t vcmpgeq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_s16))) mve_pred16_t vcmpgeq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_s16))) mve_pred16_t vcmpgeq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_s32))) mve_pred16_t vcmpgeq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_s32))) mve_pred16_t vcmpgeq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_s8))) mve_pred16_t vcmpgeq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_s8))) mve_pred16_t vcmpgeq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_s16))) mve_pred16_t vcmpgeq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_s16))) mve_pred16_t vcmpgeq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_s32))) mve_pred16_t vcmpgeq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_s32))) mve_pred16_t vcmpgeq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_s8))) mve_pred16_t vcmpgeq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_s8))) mve_pred16_t vcmpgeq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_s16))) mve_pred16_t vcmpgtq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_s16))) mve_pred16_t vcmpgtq_m(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_s32))) mve_pred16_t vcmpgtq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_s32))) mve_pred16_t vcmpgtq_m(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_s8))) mve_pred16_t vcmpgtq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_s8))) mve_pred16_t vcmpgtq_m(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_s16))) mve_pred16_t vcmpgtq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_s16))) mve_pred16_t vcmpgtq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_s32))) mve_pred16_t vcmpgtq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_s32))) mve_pred16_t vcmpgtq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_s8))) mve_pred16_t vcmpgtq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_s8))) mve_pred16_t vcmpgtq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_s16))) mve_pred16_t vcmpgtq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_s16))) mve_pred16_t vcmpgtq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_s32))) mve_pred16_t vcmpgtq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_s32))) mve_pred16_t vcmpgtq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_s8))) mve_pred16_t vcmpgtq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_s8))) mve_pred16_t vcmpgtq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_s16))) mve_pred16_t vcmpgtq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_s16))) mve_pred16_t vcmpgtq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_s32))) mve_pred16_t vcmpgtq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_s32))) mve_pred16_t vcmpgtq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_s8))) mve_pred16_t vcmpgtq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_s8))) mve_pred16_t vcmpgtq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_n_u16))) mve_pred16_t vcmphiq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_n_u16))) mve_pred16_t vcmphiq_m(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_n_u32))) mve_pred16_t vcmphiq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_n_u32))) mve_pred16_t vcmphiq_m(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_n_u8))) mve_pred16_t vcmphiq_m_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_n_u8))) mve_pred16_t vcmphiq_m(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_u16))) mve_pred16_t vcmphiq_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_u16))) mve_pred16_t vcmphiq_m(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_u32))) mve_pred16_t vcmphiq_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_u32))) mve_pred16_t vcmphiq_m(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_u8))) mve_pred16_t vcmphiq_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_m_u8))) mve_pred16_t vcmphiq_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_n_u16))) mve_pred16_t vcmphiq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_n_u16))) mve_pred16_t vcmphiq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_n_u32))) mve_pred16_t vcmphiq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_n_u32))) mve_pred16_t vcmphiq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_n_u8))) mve_pred16_t vcmphiq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_n_u8))) mve_pred16_t vcmphiq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_u16))) mve_pred16_t vcmphiq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_u16))) mve_pred16_t vcmphiq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_u32))) mve_pred16_t vcmphiq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_u32))) mve_pred16_t vcmphiq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_u8))) mve_pred16_t vcmphiq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmphiq_u8))) mve_pred16_t vcmphiq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_s16))) mve_pred16_t vcmpleq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_s16))) mve_pred16_t vcmpleq_m(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_s32))) mve_pred16_t vcmpleq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_s32))) mve_pred16_t vcmpleq_m(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_s8))) mve_pred16_t vcmpleq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_s8))) mve_pred16_t vcmpleq_m(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_s16))) mve_pred16_t vcmpleq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_s16))) mve_pred16_t vcmpleq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_s32))) mve_pred16_t vcmpleq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_s32))) mve_pred16_t vcmpleq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_s8))) mve_pred16_t vcmpleq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_s8))) mve_pred16_t vcmpleq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_s16))) mve_pred16_t vcmpleq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_s16))) mve_pred16_t vcmpleq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_s32))) mve_pred16_t vcmpleq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_s32))) mve_pred16_t vcmpleq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_s8))) mve_pred16_t vcmpleq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_s8))) mve_pred16_t vcmpleq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_s16))) mve_pred16_t vcmpleq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_s16))) mve_pred16_t vcmpleq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_s32))) mve_pred16_t vcmpleq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_s32))) mve_pred16_t vcmpleq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_s8))) mve_pred16_t vcmpleq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_s8))) mve_pred16_t vcmpleq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_s16))) mve_pred16_t vcmpltq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_s16))) mve_pred16_t vcmpltq_m(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_s32))) mve_pred16_t vcmpltq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_s32))) mve_pred16_t vcmpltq_m(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_s8))) mve_pred16_t vcmpltq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_s8))) mve_pred16_t vcmpltq_m(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_s16))) mve_pred16_t vcmpltq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_s16))) mve_pred16_t vcmpltq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_s32))) mve_pred16_t vcmpltq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_s32))) mve_pred16_t vcmpltq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_s8))) mve_pred16_t vcmpltq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_s8))) mve_pred16_t vcmpltq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_s16))) mve_pred16_t vcmpltq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_s16))) mve_pred16_t vcmpltq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_s32))) mve_pred16_t vcmpltq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_s32))) mve_pred16_t vcmpltq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_s8))) mve_pred16_t vcmpltq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_s8))) mve_pred16_t vcmpltq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_s16))) mve_pred16_t vcmpltq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_s16))) mve_pred16_t vcmpltq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_s32))) mve_pred16_t vcmpltq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_s32))) mve_pred16_t vcmpltq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_s8))) mve_pred16_t vcmpltq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_s8))) mve_pred16_t vcmpltq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_s16))) mve_pred16_t vcmpneq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_s16))) mve_pred16_t vcmpneq_m(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_s32))) mve_pred16_t vcmpneq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_s32))) mve_pred16_t vcmpneq_m(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_s8))) mve_pred16_t vcmpneq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_s8))) mve_pred16_t vcmpneq_m(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_u16))) mve_pred16_t vcmpneq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_u16))) mve_pred16_t vcmpneq_m(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_u32))) mve_pred16_t vcmpneq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_u32))) mve_pred16_t vcmpneq_m(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_u8))) mve_pred16_t vcmpneq_m_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_u8))) mve_pred16_t vcmpneq_m(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_s16))) mve_pred16_t vcmpneq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_s16))) mve_pred16_t vcmpneq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_s32))) mve_pred16_t vcmpneq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_s32))) mve_pred16_t vcmpneq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_s8))) mve_pred16_t vcmpneq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_s8))) mve_pred16_t vcmpneq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_u16))) mve_pred16_t vcmpneq_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_u16))) mve_pred16_t vcmpneq_m(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_u32))) mve_pred16_t vcmpneq_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_u32))) mve_pred16_t vcmpneq_m(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_u8))) mve_pred16_t vcmpneq_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_u8))) mve_pred16_t vcmpneq_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_s16))) mve_pred16_t vcmpneq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_s16))) mve_pred16_t vcmpneq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_s32))) mve_pred16_t vcmpneq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_s32))) mve_pred16_t vcmpneq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_s8))) mve_pred16_t vcmpneq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_s8))) mve_pred16_t vcmpneq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_u16))) mve_pred16_t vcmpneq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_u16))) mve_pred16_t vcmpneq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_u32))) mve_pred16_t vcmpneq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_u32))) mve_pred16_t vcmpneq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_u8))) mve_pred16_t vcmpneq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_u8))) mve_pred16_t vcmpneq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_s16))) mve_pred16_t vcmpneq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_s16))) mve_pred16_t vcmpneq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_s32))) mve_pred16_t vcmpneq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_s32))) mve_pred16_t vcmpneq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_s8))) mve_pred16_t vcmpneq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_s8))) mve_pred16_t vcmpneq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_u16))) mve_pred16_t vcmpneq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_u16))) mve_pred16_t vcmpneq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_u32))) mve_pred16_t vcmpneq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_u32))) mve_pred16_t vcmpneq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_u8))) mve_pred16_t vcmpneq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_u8))) mve_pred16_t vcmpneq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_s16))) int16x8_t vcreateq_s16(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_s32))) int32x4_t vcreateq_s32(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_s64))) int64x2_t vcreateq_s64(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_s8))) int8x16_t vcreateq_s8(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_u16))) uint16x8_t vcreateq_u16(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_u32))) uint32x4_t vcreateq_u32(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_u64))) uint64x2_t vcreateq_u64(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_u8))) uint8x16_t vcreateq_u8(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vctp16q))) mve_pred16_t vctp16q(uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vctp16q_m))) mve_pred16_t vctp16q_m(uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vctp32q))) mve_pred16_t vctp32q(uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vctp32q_m))) mve_pred16_t vctp32q_m(uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vctp64q))) mve_pred16_t vctp64q(uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vctp64q_m))) mve_pred16_t vctp64q_m(uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vctp8q))) mve_pred16_t vctp8q(uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vctp8q_m))) mve_pred16_t vctp8q_m(uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_n_u16))) uint16x8_t vddupq_m_n_u16(uint16x8_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_n_u16))) uint16x8_t vddupq_m(uint16x8_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_n_u32))) uint32x4_t vddupq_m_n_u32(uint32x4_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_n_u32))) uint32x4_t vddupq_m(uint32x4_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_n_u8))) uint8x16_t vddupq_m_n_u8(uint8x16_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_n_u8))) uint8x16_t vddupq_m(uint8x16_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_wb_u16))) uint16x8_t vddupq_m_wb_u16(uint16x8_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_wb_u16))) uint16x8_t vddupq_m(uint16x8_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_wb_u32))) uint32x4_t vddupq_m_wb_u32(uint32x4_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_wb_u32))) uint32x4_t vddupq_m(uint32x4_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_wb_u8))) uint8x16_t vddupq_m_wb_u8(uint8x16_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_m_wb_u8))) uint8x16_t vddupq_m(uint8x16_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_n_u16))) uint16x8_t vddupq_n_u16(uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_n_u16))) uint16x8_t vddupq_u16(uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_n_u32))) uint32x4_t vddupq_n_u32(uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_n_u32))) uint32x4_t vddupq_u32(uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_n_u8))) uint8x16_t vddupq_n_u8(uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_n_u8))) uint8x16_t vddupq_u8(uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_wb_u16))) uint16x8_t vddupq_wb_u16(uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_wb_u16))) uint16x8_t vddupq_u16(uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_wb_u32))) uint32x4_t vddupq_wb_u32(uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_wb_u32))) uint32x4_t vddupq_u32(uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_wb_u8))) uint8x16_t vddupq_wb_u8(uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_wb_u8))) uint8x16_t vddupq_u8(uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_n_u16))) uint16x8_t vddupq_x_n_u16(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_n_u16))) uint16x8_t vddupq_x_u16(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_n_u32))) uint32x4_t vddupq_x_n_u32(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_n_u32))) uint32x4_t vddupq_x_u32(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_n_u8))) uint8x16_t vddupq_x_n_u8(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_n_u8))) uint8x16_t vddupq_x_u8(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_wb_u16))) uint16x8_t vddupq_x_wb_u16(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_wb_u16))) uint16x8_t vddupq_x_u16(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_wb_u32))) uint32x4_t vddupq_x_wb_u32(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_wb_u32))) uint32x4_t vddupq_x_u32(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_wb_u8))) uint8x16_t vddupq_x_wb_u8(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vddupq_x_wb_u8))) uint8x16_t vddupq_x_u8(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_s16))) int16x8_t vdupq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_s16))) int16x8_t vdupq_m(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_s32))) int32x4_t vdupq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_s32))) int32x4_t vdupq_m(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_s8))) int8x16_t vdupq_m_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_s8))) int8x16_t vdupq_m(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_u16))) uint16x8_t vdupq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_u16))) uint16x8_t vdupq_m(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_u32))) uint32x4_t vdupq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_u32))) uint32x4_t vdupq_m(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_u8))) uint8x16_t vdupq_m_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_u8))) uint8x16_t vdupq_m(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_n_s16))) int16x8_t vdupq_n_s16(int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_n_s32))) int32x4_t vdupq_n_s32(int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_n_s8))) int8x16_t vdupq_n_s8(int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_n_u16))) uint16x8_t vdupq_n_u16(uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_n_u32))) uint32x4_t vdupq_n_u32(uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_n_u8))) uint8x16_t vdupq_n_u8(uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_x_n_s16))) int16x8_t vdupq_x_n_s16(int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_x_n_s32))) int32x4_t vdupq_x_n_s32(int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_x_n_s8))) int8x16_t vdupq_x_n_s8(int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_x_n_u16))) uint16x8_t vdupq_x_n_u16(uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_x_n_u32))) uint32x4_t vdupq_x_n_u32(uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_x_n_u8))) uint8x16_t vdupq_x_n_u8(uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_n_u16))) uint16x8_t vdwdupq_m_n_u16(uint16x8_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_n_u16))) uint16x8_t vdwdupq_m(uint16x8_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_n_u32))) uint32x4_t vdwdupq_m_n_u32(uint32x4_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_n_u32))) uint32x4_t vdwdupq_m(uint32x4_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_n_u8))) uint8x16_t vdwdupq_m_n_u8(uint8x16_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_n_u8))) uint8x16_t vdwdupq_m(uint8x16_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_wb_u16))) uint16x8_t vdwdupq_m_wb_u16(uint16x8_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_wb_u16))) uint16x8_t vdwdupq_m(uint16x8_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_wb_u32))) uint32x4_t vdwdupq_m_wb_u32(uint32x4_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_wb_u32))) uint32x4_t vdwdupq_m(uint32x4_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_wb_u8))) uint8x16_t vdwdupq_m_wb_u8(uint8x16_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_m_wb_u8))) uint8x16_t vdwdupq_m(uint8x16_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_n_u16))) uint16x8_t vdwdupq_n_u16(uint32_t, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_n_u16))) uint16x8_t vdwdupq_u16(uint32_t, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_n_u32))) uint32x4_t vdwdupq_n_u32(uint32_t, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_n_u32))) uint32x4_t vdwdupq_u32(uint32_t, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_n_u8))) uint8x16_t vdwdupq_n_u8(uint32_t, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_n_u8))) uint8x16_t vdwdupq_u8(uint32_t, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_wb_u16))) uint16x8_t vdwdupq_wb_u16(uint32_t *, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_wb_u16))) uint16x8_t vdwdupq_u16(uint32_t *, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_wb_u32))) uint32x4_t vdwdupq_wb_u32(uint32_t *, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_wb_u32))) uint32x4_t vdwdupq_u32(uint32_t *, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_wb_u8))) uint8x16_t vdwdupq_wb_u8(uint32_t *, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_wb_u8))) uint8x16_t vdwdupq_u8(uint32_t *, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_n_u16))) uint16x8_t vdwdupq_x_n_u16(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_n_u16))) uint16x8_t vdwdupq_x_u16(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_n_u32))) uint32x4_t vdwdupq_x_n_u32(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_n_u32))) uint32x4_t vdwdupq_x_u32(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_n_u8))) uint8x16_t vdwdupq_x_n_u8(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_n_u8))) uint8x16_t vdwdupq_x_u8(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_wb_u16))) uint16x8_t vdwdupq_x_wb_u16(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_wb_u16))) uint16x8_t vdwdupq_x_u16(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_wb_u32))) uint32x4_t vdwdupq_x_wb_u32(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_wb_u32))) uint32x4_t vdwdupq_x_u32(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_wb_u8))) uint8x16_t vdwdupq_x_wb_u8(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdwdupq_x_wb_u8))) uint8x16_t vdwdupq_x_u8(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_s16))) int16x8_t veorq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_s16))) int16x8_t veorq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_s32))) int32x4_t veorq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_s32))) int32x4_t veorq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_s8))) int8x16_t veorq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_s8))) int8x16_t veorq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_u16))) uint16x8_t veorq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_u16))) uint16x8_t veorq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_u32))) uint32x4_t veorq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_u32))) uint32x4_t veorq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_u8))) uint8x16_t veorq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_u8))) uint8x16_t veorq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_s16))) int16x8_t veorq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_s16))) int16x8_t veorq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_s32))) int32x4_t veorq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_s32))) int32x4_t veorq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_s8))) int8x16_t veorq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_s8))) int8x16_t veorq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_u16))) uint16x8_t veorq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_u16))) uint16x8_t veorq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_u32))) uint32x4_t veorq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_u32))) uint32x4_t veorq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_u8))) uint8x16_t veorq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_u8))) uint8x16_t veorq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_s16))) int16x8_t veorq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_s16))) int16x8_t veorq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_s32))) int32x4_t veorq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_s32))) int32x4_t veorq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_s8))) int8x16_t veorq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_s8))) int8x16_t veorq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_u16))) uint16x8_t veorq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_u16))) uint16x8_t veorq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_u32))) uint32x4_t veorq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_u32))) uint32x4_t veorq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_u8))) uint8x16_t veorq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_u8))) uint8x16_t veorq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_s16))) int16_t vgetq_lane_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_s16))) int16_t vgetq_lane(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_s32))) int32_t vgetq_lane_s32(int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_s32))) int32_t vgetq_lane(int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_s64))) int64_t vgetq_lane_s64(int64x2_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_s64))) int64_t vgetq_lane(int64x2_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_s8))) int8_t vgetq_lane_s8(int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_s8))) int8_t vgetq_lane(int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_u16))) uint16_t vgetq_lane_u16(uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_u16))) uint16_t vgetq_lane(uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_u32))) uint32_t vgetq_lane_u32(uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_u32))) uint32_t vgetq_lane(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_u64))) uint64_t vgetq_lane_u64(uint64x2_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_u64))) uint64_t vgetq_lane(uint64x2_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_u8))) uint8_t vgetq_lane_u8(uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_u8))) uint8_t vgetq_lane(uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_s16))) int16x8_t vhaddq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_s16))) int16x8_t vhaddq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_s32))) int32x4_t vhaddq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_s32))) int32x4_t vhaddq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_s8))) int8x16_t vhaddq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_s8))) int8x16_t vhaddq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_u16))) uint16x8_t vhaddq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_u16))) uint16x8_t vhaddq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_u32))) uint32x4_t vhaddq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_u32))) uint32x4_t vhaddq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_u8))) uint8x16_t vhaddq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_n_u8))) uint8x16_t vhaddq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_s16))) int16x8_t vhaddq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_s16))) int16x8_t vhaddq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_s32))) int32x4_t vhaddq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_s32))) int32x4_t vhaddq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_s8))) int8x16_t vhaddq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_s8))) int8x16_t vhaddq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_u16))) uint16x8_t vhaddq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_u16))) uint16x8_t vhaddq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_u32))) uint32x4_t vhaddq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_u32))) uint32x4_t vhaddq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_u8))) uint8x16_t vhaddq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_m_u8))) uint8x16_t vhaddq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_s16))) int16x8_t vhaddq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_s16))) int16x8_t vhaddq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_s32))) int32x4_t vhaddq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_s32))) int32x4_t vhaddq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_s8))) int8x16_t vhaddq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_s8))) int8x16_t vhaddq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_u16))) uint16x8_t vhaddq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_u16))) uint16x8_t vhaddq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_u32))) uint32x4_t vhaddq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_u32))) uint32x4_t vhaddq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_u8))) uint8x16_t vhaddq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_n_u8))) uint8x16_t vhaddq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_s16))) int16x8_t vhaddq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_s16))) int16x8_t vhaddq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_s32))) int32x4_t vhaddq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_s32))) int32x4_t vhaddq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_s8))) int8x16_t vhaddq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_s8))) int8x16_t vhaddq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_u16))) uint16x8_t vhaddq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_u16))) uint16x8_t vhaddq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_u32))) uint32x4_t vhaddq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_u32))) uint32x4_t vhaddq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_u8))) uint8x16_t vhaddq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_u8))) uint8x16_t vhaddq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_s16))) int16x8_t vhaddq_x_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_s16))) int16x8_t vhaddq_x(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_s32))) int32x4_t vhaddq_x_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_s32))) int32x4_t vhaddq_x(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_s8))) int8x16_t vhaddq_x_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_s8))) int8x16_t vhaddq_x(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_u16))) uint16x8_t vhaddq_x_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_u16))) uint16x8_t vhaddq_x(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_u32))) uint32x4_t vhaddq_x_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_u32))) uint32x4_t vhaddq_x(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_u8))) uint8x16_t vhaddq_x_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_n_u8))) uint8x16_t vhaddq_x(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_s16))) int16x8_t vhaddq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_s16))) int16x8_t vhaddq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_s32))) int32x4_t vhaddq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_s32))) int32x4_t vhaddq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_s8))) int8x16_t vhaddq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_s8))) int8x16_t vhaddq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_u16))) uint16x8_t vhaddq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_u16))) uint16x8_t vhaddq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_u32))) uint32x4_t vhaddq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_u32))) uint32x4_t vhaddq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_u8))) uint8x16_t vhaddq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhaddq_x_u8))) uint8x16_t vhaddq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_m_s16))) int16x8_t vhcaddq_rot270_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_m_s16))) int16x8_t vhcaddq_rot270_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_m_s32))) int32x4_t vhcaddq_rot270_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_m_s32))) int32x4_t vhcaddq_rot270_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_m_s8))) int8x16_t vhcaddq_rot270_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_m_s8))) int8x16_t vhcaddq_rot270_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_s16))) int16x8_t vhcaddq_rot270_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_s16))) int16x8_t vhcaddq_rot270(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_s32))) int32x4_t vhcaddq_rot270_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_s32))) int32x4_t vhcaddq_rot270(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_s8))) int8x16_t vhcaddq_rot270_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_s8))) int8x16_t vhcaddq_rot270(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_x_s16))) int16x8_t vhcaddq_rot270_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_x_s16))) int16x8_t vhcaddq_rot270_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_x_s32))) int32x4_t vhcaddq_rot270_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_x_s32))) int32x4_t vhcaddq_rot270_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_x_s8))) int8x16_t vhcaddq_rot270_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot270_x_s8))) int8x16_t vhcaddq_rot270_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_m_s16))) int16x8_t vhcaddq_rot90_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_m_s16))) int16x8_t vhcaddq_rot90_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_m_s32))) int32x4_t vhcaddq_rot90_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_m_s32))) int32x4_t vhcaddq_rot90_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_m_s8))) int8x16_t vhcaddq_rot90_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_m_s8))) int8x16_t vhcaddq_rot90_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_s16))) int16x8_t vhcaddq_rot90_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_s16))) int16x8_t vhcaddq_rot90(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_s32))) int32x4_t vhcaddq_rot90_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_s32))) int32x4_t vhcaddq_rot90(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_s8))) int8x16_t vhcaddq_rot90_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_s8))) int8x16_t vhcaddq_rot90(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_x_s16))) int16x8_t vhcaddq_rot90_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_x_s16))) int16x8_t vhcaddq_rot90_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_x_s32))) int32x4_t vhcaddq_rot90_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_x_s32))) int32x4_t vhcaddq_rot90_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_x_s8))) int8x16_t vhcaddq_rot90_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhcaddq_rot90_x_s8))) int8x16_t vhcaddq_rot90_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_s16))) int16x8_t vhsubq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_s16))) int16x8_t vhsubq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_s32))) int32x4_t vhsubq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_s32))) int32x4_t vhsubq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_s8))) int8x16_t vhsubq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_s8))) int8x16_t vhsubq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_u16))) uint16x8_t vhsubq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_u16))) uint16x8_t vhsubq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_u32))) uint32x4_t vhsubq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_u32))) uint32x4_t vhsubq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_u8))) uint8x16_t vhsubq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_n_u8))) uint8x16_t vhsubq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_s16))) int16x8_t vhsubq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_s16))) int16x8_t vhsubq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_s32))) int32x4_t vhsubq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_s32))) int32x4_t vhsubq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_s8))) int8x16_t vhsubq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_s8))) int8x16_t vhsubq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_u16))) uint16x8_t vhsubq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_u16))) uint16x8_t vhsubq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_u32))) uint32x4_t vhsubq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_u32))) uint32x4_t vhsubq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_u8))) uint8x16_t vhsubq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_m_u8))) uint8x16_t vhsubq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_s16))) int16x8_t vhsubq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_s16))) int16x8_t vhsubq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_s32))) int32x4_t vhsubq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_s32))) int32x4_t vhsubq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_s8))) int8x16_t vhsubq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_s8))) int8x16_t vhsubq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_u16))) uint16x8_t vhsubq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_u16))) uint16x8_t vhsubq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_u32))) uint32x4_t vhsubq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_u32))) uint32x4_t vhsubq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_u8))) uint8x16_t vhsubq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_n_u8))) uint8x16_t vhsubq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_s16))) int16x8_t vhsubq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_s16))) int16x8_t vhsubq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_s32))) int32x4_t vhsubq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_s32))) int32x4_t vhsubq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_s8))) int8x16_t vhsubq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_s8))) int8x16_t vhsubq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_u16))) uint16x8_t vhsubq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_u16))) uint16x8_t vhsubq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_u32))) uint32x4_t vhsubq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_u32))) uint32x4_t vhsubq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_u8))) uint8x16_t vhsubq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_u8))) uint8x16_t vhsubq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_s16))) int16x8_t vhsubq_x_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_s16))) int16x8_t vhsubq_x(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_s32))) int32x4_t vhsubq_x_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_s32))) int32x4_t vhsubq_x(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_s8))) int8x16_t vhsubq_x_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_s8))) int8x16_t vhsubq_x(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_u16))) uint16x8_t vhsubq_x_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_u16))) uint16x8_t vhsubq_x(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_u32))) uint32x4_t vhsubq_x_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_u32))) uint32x4_t vhsubq_x(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_u8))) uint8x16_t vhsubq_x_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_n_u8))) uint8x16_t vhsubq_x(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_s16))) int16x8_t vhsubq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_s16))) int16x8_t vhsubq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_s32))) int32x4_t vhsubq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_s32))) int32x4_t vhsubq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_s8))) int8x16_t vhsubq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_s8))) int8x16_t vhsubq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_u16))) uint16x8_t vhsubq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_u16))) uint16x8_t vhsubq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_u32))) uint32x4_t vhsubq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_u32))) uint32x4_t vhsubq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_u8))) uint8x16_t vhsubq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vhsubq_x_u8))) uint8x16_t vhsubq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_n_u16))) uint16x8_t vidupq_m_n_u16(uint16x8_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_n_u16))) uint16x8_t vidupq_m(uint16x8_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_n_u32))) uint32x4_t vidupq_m_n_u32(uint32x4_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_n_u32))) uint32x4_t vidupq_m(uint32x4_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_n_u8))) uint8x16_t vidupq_m_n_u8(uint8x16_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_n_u8))) uint8x16_t vidupq_m(uint8x16_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_wb_u16))) uint16x8_t vidupq_m_wb_u16(uint16x8_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_wb_u16))) uint16x8_t vidupq_m(uint16x8_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_wb_u32))) uint32x4_t vidupq_m_wb_u32(uint32x4_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_wb_u32))) uint32x4_t vidupq_m(uint32x4_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_wb_u8))) uint8x16_t vidupq_m_wb_u8(uint8x16_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_m_wb_u8))) uint8x16_t vidupq_m(uint8x16_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_n_u16))) uint16x8_t vidupq_n_u16(uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_n_u16))) uint16x8_t vidupq_u16(uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_n_u32))) uint32x4_t vidupq_n_u32(uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_n_u32))) uint32x4_t vidupq_u32(uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_n_u8))) uint8x16_t vidupq_n_u8(uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_n_u8))) uint8x16_t vidupq_u8(uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_wb_u16))) uint16x8_t vidupq_wb_u16(uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_wb_u16))) uint16x8_t vidupq_u16(uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_wb_u32))) uint32x4_t vidupq_wb_u32(uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_wb_u32))) uint32x4_t vidupq_u32(uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_wb_u8))) uint8x16_t vidupq_wb_u8(uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_wb_u8))) uint8x16_t vidupq_u8(uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_n_u16))) uint16x8_t vidupq_x_n_u16(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_n_u16))) uint16x8_t vidupq_x_u16(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_n_u32))) uint32x4_t vidupq_x_n_u32(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_n_u32))) uint32x4_t vidupq_x_u32(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_n_u8))) uint8x16_t vidupq_x_n_u8(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_n_u8))) uint8x16_t vidupq_x_u8(uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_wb_u16))) uint16x8_t vidupq_x_wb_u16(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_wb_u16))) uint16x8_t vidupq_x_u16(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_wb_u32))) uint32x4_t vidupq_x_wb_u32(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_wb_u32))) uint32x4_t vidupq_x_u32(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_wb_u8))) uint8x16_t vidupq_x_wb_u8(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vidupq_x_wb_u8))) uint8x16_t vidupq_x_u8(uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_n_u16))) uint16x8_t viwdupq_m_n_u16(uint16x8_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_n_u16))) uint16x8_t viwdupq_m(uint16x8_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_n_u32))) uint32x4_t viwdupq_m_n_u32(uint32x4_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_n_u32))) uint32x4_t viwdupq_m(uint32x4_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_n_u8))) uint8x16_t viwdupq_m_n_u8(uint8x16_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_n_u8))) uint8x16_t viwdupq_m(uint8x16_t, uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_wb_u16))) uint16x8_t viwdupq_m_wb_u16(uint16x8_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_wb_u16))) uint16x8_t viwdupq_m(uint16x8_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_wb_u32))) uint32x4_t viwdupq_m_wb_u32(uint32x4_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_wb_u32))) uint32x4_t viwdupq_m(uint32x4_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_wb_u8))) uint8x16_t viwdupq_m_wb_u8(uint8x16_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_m_wb_u8))) uint8x16_t viwdupq_m(uint8x16_t, uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_n_u16))) uint16x8_t viwdupq_n_u16(uint32_t, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_n_u16))) uint16x8_t viwdupq_u16(uint32_t, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_n_u32))) uint32x4_t viwdupq_n_u32(uint32_t, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_n_u32))) uint32x4_t viwdupq_u32(uint32_t, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_n_u8))) uint8x16_t viwdupq_n_u8(uint32_t, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_n_u8))) uint8x16_t viwdupq_u8(uint32_t, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_wb_u16))) uint16x8_t viwdupq_wb_u16(uint32_t *, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_wb_u16))) uint16x8_t viwdupq_u16(uint32_t *, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_wb_u32))) uint32x4_t viwdupq_wb_u32(uint32_t *, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_wb_u32))) uint32x4_t viwdupq_u32(uint32_t *, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_wb_u8))) uint8x16_t viwdupq_wb_u8(uint32_t *, uint32_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_wb_u8))) uint8x16_t viwdupq_u8(uint32_t *, uint32_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_n_u16))) uint16x8_t viwdupq_x_n_u16(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_n_u16))) uint16x8_t viwdupq_x_u16(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_n_u32))) uint32x4_t viwdupq_x_n_u32(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_n_u32))) uint32x4_t viwdupq_x_u32(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_n_u8))) uint8x16_t viwdupq_x_n_u8(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_n_u8))) uint8x16_t viwdupq_x_u8(uint32_t, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_wb_u16))) uint16x8_t viwdupq_x_wb_u16(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_wb_u16))) uint16x8_t viwdupq_x_u16(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_wb_u32))) uint32x4_t viwdupq_x_wb_u32(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_wb_u32))) uint32x4_t viwdupq_x_u32(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_wb_u8))) uint8x16_t viwdupq_x_wb_u8(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_viwdupq_x_wb_u8))) uint8x16_t viwdupq_x_u8(uint32_t *, uint32_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_s16))) int16x8_t vld1q_s16(const int16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_s16))) int16x8_t vld1q(const int16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_s32))) int32x4_t vld1q_s32(const int32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_s32))) int32x4_t vld1q(const int32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_s8))) int8x16_t vld1q_s8(const int8_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_s8))) int8x16_t vld1q(const int8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_u16))) uint16x8_t vld1q_u16(const uint16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_u16))) uint16x8_t vld1q(const uint16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_u32))) uint32x4_t vld1q_u32(const uint32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_u32))) uint32x4_t vld1q(const uint32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_u8))) uint8x16_t vld1q_u8(const uint8_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_u8))) uint8x16_t vld1q(const uint8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_s16))) int16x8_t vld1q_z_s16(const int16_t *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_s16))) int16x8_t vld1q_z(const int16_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_s32))) int32x4_t vld1q_z_s32(const int32_t *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_s32))) int32x4_t vld1q_z(const int32_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_s8))) int8x16_t vld1q_z_s8(const int8_t *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_s8))) int8x16_t vld1q_z(const int8_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_u16))) uint16x8_t vld1q_z_u16(const uint16_t *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_u16))) uint16x8_t vld1q_z(const uint16_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_u32))) uint32x4_t vld1q_z_u32(const uint32_t *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_u32))) uint32x4_t vld1q_z(const uint32_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_u8))) uint8x16_t vld1q_z_u8(const uint8_t *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_u8))) uint8x16_t vld1q_z(const uint8_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld2q_s16))) int16x8x2_t vld2q_s16(const int16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld2q_s16))) int16x8x2_t vld2q(const int16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld2q_s32))) int32x4x2_t vld2q_s32(const int32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld2q_s32))) int32x4x2_t vld2q(const int32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld2q_s8))) int8x16x2_t vld2q_s8(const int8_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld2q_s8))) int8x16x2_t vld2q(const int8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld2q_u16))) uint16x8x2_t vld2q_u16(const uint16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld2q_u16))) uint16x8x2_t vld2q(const uint16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld2q_u32))) uint32x4x2_t vld2q_u32(const uint32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld2q_u32))) uint32x4x2_t vld2q(const uint32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld2q_u8))) uint8x16x2_t vld2q_u8(const uint8_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld2q_u8))) uint8x16x2_t vld2q(const uint8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld4q_s16))) int16x8x4_t vld4q_s16(const int16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld4q_s16))) int16x8x4_t vld4q(const int16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld4q_s32))) int32x4x4_t vld4q_s32(const int32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld4q_s32))) int32x4x4_t vld4q(const int32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld4q_s8))) int8x16x4_t vld4q_s8(const int8_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld4q_s8))) int8x16x4_t vld4q(const int8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld4q_u16))) uint16x8x4_t vld4q_u16(const uint16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld4q_u16))) uint16x8x4_t vld4q(const uint16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld4q_u32))) uint32x4x4_t vld4q_u32(const uint32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld4q_u32))) uint32x4x4_t vld4q(const uint32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld4q_u8))) uint8x16x4_t vld4q_u8(const uint8_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld4q_u8))) uint8x16x4_t vld4q(const uint8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_s16))) int16x8_t vldrbq_gather_offset_s16(const int8_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_s16))) int16x8_t vldrbq_gather_offset(const int8_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_s32))) int32x4_t vldrbq_gather_offset_s32(const int8_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_s32))) int32x4_t vldrbq_gather_offset(const int8_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_s8))) int8x16_t vldrbq_gather_offset_s8(const int8_t *, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_s8))) int8x16_t vldrbq_gather_offset(const int8_t *, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_u16))) uint16x8_t vldrbq_gather_offset_u16(const uint8_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_u16))) uint16x8_t vldrbq_gather_offset(const uint8_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_u32))) uint32x4_t vldrbq_gather_offset_u32(const uint8_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_u32))) uint32x4_t vldrbq_gather_offset(const uint8_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_u8))) uint8x16_t vldrbq_gather_offset_u8(const uint8_t *, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_u8))) uint8x16_t vldrbq_gather_offset(const uint8_t *, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s16))) int16x8_t vldrbq_gather_offset_z_s16(const int8_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s16))) int16x8_t vldrbq_gather_offset_z(const int8_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s32))) int32x4_t vldrbq_gather_offset_z_s32(const int8_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s32))) int32x4_t vldrbq_gather_offset_z(const int8_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s8))) int8x16_t vldrbq_gather_offset_z_s8(const int8_t *, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_s8))) int8x16_t vldrbq_gather_offset_z(const int8_t *, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u16))) uint16x8_t vldrbq_gather_offset_z_u16(const uint8_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u16))) uint16x8_t vldrbq_gather_offset_z(const uint8_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u32))) uint32x4_t vldrbq_gather_offset_z_u32(const uint8_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u32))) uint32x4_t vldrbq_gather_offset_z(const uint8_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u8))) uint8x16_t vldrbq_gather_offset_z_u8(const uint8_t *, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_gather_offset_z_u8))) uint8x16_t vldrbq_gather_offset_z(const uint8_t *, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_s16))) int16x8_t vldrbq_s16(const int8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_s32))) int32x4_t vldrbq_s32(const int8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_s8))) int8x16_t vldrbq_s8(const int8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_u16))) uint16x8_t vldrbq_u16(const uint8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_u32))) uint32x4_t vldrbq_u32(const uint8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_u8))) uint8x16_t vldrbq_u8(const uint8_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_z_s16))) int16x8_t vldrbq_z_s16(const int8_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_z_s32))) int32x4_t vldrbq_z_s32(const int8_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_z_s8))) int8x16_t vldrbq_z_s8(const int8_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_z_u16))) uint16x8_t vldrbq_z_u16(const uint8_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_z_u32))) uint32x4_t vldrbq_z_u32(const uint8_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrbq_z_u8))) uint8x16_t vldrbq_z_u8(const uint8_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_base_s64))) int64x2_t vldrdq_gather_base_s64(uint64x2_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_base_u64))) uint64x2_t vldrdq_gather_base_u64(uint64x2_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_base_wb_s64))) int64x2_t vldrdq_gather_base_wb_s64(uint64x2_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_base_wb_u64))) uint64x2_t vldrdq_gather_base_wb_u64(uint64x2_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_base_wb_z_s64))) int64x2_t vldrdq_gather_base_wb_z_s64(uint64x2_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_base_wb_z_u64))) uint64x2_t vldrdq_gather_base_wb_z_u64(uint64x2_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_base_z_s64))) int64x2_t vldrdq_gather_base_z_s64(uint64x2_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_base_z_u64))) uint64x2_t vldrdq_gather_base_z_u64(uint64x2_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_offset_s64))) int64x2_t vldrdq_gather_offset_s64(const int64_t *, uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_offset_s64))) int64x2_t vldrdq_gather_offset(const int64_t *, uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_offset_u64))) uint64x2_t vldrdq_gather_offset_u64(const uint64_t *, uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_offset_u64))) uint64x2_t vldrdq_gather_offset(const uint64_t *, uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_offset_z_s64))) int64x2_t vldrdq_gather_offset_z_s64(const int64_t *, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_offset_z_s64))) int64x2_t vldrdq_gather_offset_z(const int64_t *, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_offset_z_u64))) uint64x2_t vldrdq_gather_offset_z_u64(const uint64_t *, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_offset_z_u64))) uint64x2_t vldrdq_gather_offset_z(const uint64_t *, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_s64))) int64x2_t vldrdq_gather_shifted_offset_s64(const int64_t *, uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_s64))) int64x2_t vldrdq_gather_shifted_offset(const int64_t *, uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_u64))) uint64x2_t vldrdq_gather_shifted_offset_u64(const uint64_t *, uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_u64))) uint64x2_t vldrdq_gather_shifted_offset(const uint64_t *, uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_z_s64))) int64x2_t vldrdq_gather_shifted_offset_z_s64(const int64_t *, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_z_s64))) int64x2_t vldrdq_gather_shifted_offset_z(const int64_t *, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_z_u64))) uint64x2_t vldrdq_gather_shifted_offset_z_u64(const uint64_t *, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrdq_gather_shifted_offset_z_u64))) uint64x2_t vldrdq_gather_shifted_offset_z(const uint64_t *, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_s16))) int16x8_t vldrhq_gather_offset_s16(const int16_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_s16))) int16x8_t vldrhq_gather_offset(const int16_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_s32))) int32x4_t vldrhq_gather_offset_s32(const int16_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_s32))) int32x4_t vldrhq_gather_offset(const int16_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_u16))) uint16x8_t vldrhq_gather_offset_u16(const uint16_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_u16))) uint16x8_t vldrhq_gather_offset(const uint16_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_u32))) uint32x4_t vldrhq_gather_offset_u32(const uint16_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_u32))) uint32x4_t vldrhq_gather_offset(const uint16_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_s16))) int16x8_t vldrhq_gather_offset_z_s16(const int16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_s16))) int16x8_t vldrhq_gather_offset_z(const int16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_s32))) int32x4_t vldrhq_gather_offset_z_s32(const int16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_s32))) int32x4_t vldrhq_gather_offset_z(const int16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_u16))) uint16x8_t vldrhq_gather_offset_z_u16(const uint16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_u16))) uint16x8_t vldrhq_gather_offset_z(const uint16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_u32))) uint32x4_t vldrhq_gather_offset_z_u32(const uint16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_u32))) uint32x4_t vldrhq_gather_offset_z(const uint16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_s16))) int16x8_t vldrhq_gather_shifted_offset_s16(const int16_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_s16))) int16x8_t vldrhq_gather_shifted_offset(const int16_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_s32))) int32x4_t vldrhq_gather_shifted_offset_s32(const int16_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_s32))) int32x4_t vldrhq_gather_shifted_offset(const int16_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_u16))) uint16x8_t vldrhq_gather_shifted_offset_u16(const uint16_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_u16))) uint16x8_t vldrhq_gather_shifted_offset(const uint16_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_u32))) uint32x4_t vldrhq_gather_shifted_offset_u32(const uint16_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_u32))) uint32x4_t vldrhq_gather_shifted_offset(const uint16_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_s16))) int16x8_t vldrhq_gather_shifted_offset_z_s16(const int16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_s16))) int16x8_t vldrhq_gather_shifted_offset_z(const int16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_s32))) int32x4_t vldrhq_gather_shifted_offset_z_s32(const int16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_s32))) int32x4_t vldrhq_gather_shifted_offset_z(const int16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_u16))) uint16x8_t vldrhq_gather_shifted_offset_z_u16(const uint16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_u16))) uint16x8_t vldrhq_gather_shifted_offset_z(const uint16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_u32))) uint32x4_t vldrhq_gather_shifted_offset_z_u32(const uint16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_u32))) uint32x4_t vldrhq_gather_shifted_offset_z(const uint16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_s16))) int16x8_t vldrhq_s16(const int16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_s32))) int32x4_t vldrhq_s32(const int16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_u16))) uint16x8_t vldrhq_u16(const uint16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_u32))) uint32x4_t vldrhq_u32(const uint16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_z_s16))) int16x8_t vldrhq_z_s16(const int16_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_z_s32))) int32x4_t vldrhq_z_s32(const int16_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_z_u16))) uint16x8_t vldrhq_z_u16(const uint16_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_z_u32))) uint32x4_t vldrhq_z_u32(const uint16_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_s32))) int32x4_t vldrwq_gather_base_s32(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_u32))) uint32x4_t vldrwq_gather_base_u32(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_wb_s32))) int32x4_t vldrwq_gather_base_wb_s32(uint32x4_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_wb_u32))) uint32x4_t vldrwq_gather_base_wb_u32(uint32x4_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_wb_z_s32))) int32x4_t vldrwq_gather_base_wb_z_s32(uint32x4_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_wb_z_u32))) uint32x4_t vldrwq_gather_base_wb_z_u32(uint32x4_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_z_s32))) int32x4_t vldrwq_gather_base_z_s32(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_z_u32))) uint32x4_t vldrwq_gather_base_z_u32(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_s32))) int32x4_t vldrwq_gather_offset_s32(const int32_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_s32))) int32x4_t vldrwq_gather_offset(const int32_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_u32))) uint32x4_t vldrwq_gather_offset_u32(const uint32_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_u32))) uint32x4_t vldrwq_gather_offset(const uint32_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_z_s32))) int32x4_t vldrwq_gather_offset_z_s32(const int32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_z_s32))) int32x4_t vldrwq_gather_offset_z(const int32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_z_u32))) uint32x4_t vldrwq_gather_offset_z_u32(const uint32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_z_u32))) uint32x4_t vldrwq_gather_offset_z(const uint32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_s32))) int32x4_t vldrwq_gather_shifted_offset_s32(const int32_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_s32))) int32x4_t vldrwq_gather_shifted_offset(const int32_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_u32))) uint32x4_t vldrwq_gather_shifted_offset_u32(const uint32_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_u32))) uint32x4_t vldrwq_gather_shifted_offset(const uint32_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_s32))) int32x4_t vldrwq_gather_shifted_offset_z_s32(const int32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_s32))) int32x4_t vldrwq_gather_shifted_offset_z(const int32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_u32))) uint32x4_t vldrwq_gather_shifted_offset_z_u32(const uint32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_u32))) uint32x4_t vldrwq_gather_shifted_offset_z(const uint32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_s32))) int32x4_t vldrwq_s32(const int32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_u32))) uint32x4_t vldrwq_u32(const uint32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_z_s32))) int32x4_t vldrwq_z_s32(const int32_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_z_u32))) uint32x4_t vldrwq_z_u32(const uint32_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_m_s16))) uint16x8_t vmaxaq_m_s16(uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_m_s16))) uint16x8_t vmaxaq_m(uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_m_s32))) uint32x4_t vmaxaq_m_s32(uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_m_s32))) uint32x4_t vmaxaq_m(uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_m_s8))) uint8x16_t vmaxaq_m_s8(uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_m_s8))) uint8x16_t vmaxaq_m(uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_s16))) uint16x8_t vmaxaq_s16(uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_s16))) uint16x8_t vmaxaq(uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_s32))) uint32x4_t vmaxaq_s32(uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_s32))) uint32x4_t vmaxaq(uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_s8))) uint8x16_t vmaxaq_s8(uint8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxaq_s8))) uint8x16_t vmaxaq(uint8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_p_s16))) uint16_t vmaxavq_p_s16(uint16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_p_s16))) uint16_t vmaxavq_p(uint16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_p_s32))) uint32_t vmaxavq_p_s32(uint32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_p_s32))) uint32_t vmaxavq_p(uint32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_p_s8))) uint8_t vmaxavq_p_s8(uint8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_p_s8))) uint8_t vmaxavq_p(uint8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_s16))) uint16_t vmaxavq_s16(uint16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_s16))) uint16_t vmaxavq(uint16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_s32))) uint32_t vmaxavq_s32(uint32_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_s32))) uint32_t vmaxavq(uint32_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_s8))) uint8_t vmaxavq_s8(uint8_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxavq_s8))) uint8_t vmaxavq(uint8_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_s16))) int16x8_t vmaxq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_s16))) int16x8_t vmaxq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_s32))) int32x4_t vmaxq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_s32))) int32x4_t vmaxq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_s8))) int8x16_t vmaxq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_s8))) int8x16_t vmaxq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_u16))) uint16x8_t vmaxq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_u16))) uint16x8_t vmaxq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_u32))) uint32x4_t vmaxq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_u32))) uint32x4_t vmaxq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_u8))) uint8x16_t vmaxq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_m_u8))) uint8x16_t vmaxq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_s16))) int16x8_t vmaxq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_s16))) int16x8_t vmaxq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_s32))) int32x4_t vmaxq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_s32))) int32x4_t vmaxq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_s8))) int8x16_t vmaxq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_s8))) int8x16_t vmaxq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_u16))) uint16x8_t vmaxq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_u16))) uint16x8_t vmaxq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_u32))) uint32x4_t vmaxq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_u32))) uint32x4_t vmaxq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_u8))) uint8x16_t vmaxq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_u8))) uint8x16_t vmaxq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_s16))) int16x8_t vmaxq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_s16))) int16x8_t vmaxq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_s32))) int32x4_t vmaxq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_s32))) int32x4_t vmaxq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_s8))) int8x16_t vmaxq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_s8))) int8x16_t vmaxq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_u16))) uint16x8_t vmaxq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_u16))) uint16x8_t vmaxq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_u32))) uint32x4_t vmaxq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_u32))) uint32x4_t vmaxq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_u8))) uint8x16_t vmaxq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxq_x_u8))) uint8x16_t vmaxq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_s16))) int16_t vmaxvq_p_s16(int16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_s16))) int16_t vmaxvq_p(int16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_s32))) int32_t vmaxvq_p_s32(int32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_s32))) int32_t vmaxvq_p(int32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_s8))) int8_t vmaxvq_p_s8(int8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_s8))) int8_t vmaxvq_p(int8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_u16))) uint16_t vmaxvq_p_u16(uint16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_u16))) uint16_t vmaxvq_p(uint16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_u32))) uint32_t vmaxvq_p_u32(uint32_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_u32))) uint32_t vmaxvq_p(uint32_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_u8))) uint8_t vmaxvq_p_u8(uint8_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_p_u8))) uint8_t vmaxvq_p(uint8_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_s16))) int16_t vmaxvq_s16(int16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_s16))) int16_t vmaxvq(int16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_s32))) int32_t vmaxvq_s32(int32_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_s32))) int32_t vmaxvq(int32_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_s8))) int8_t vmaxvq_s8(int8_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_s8))) int8_t vmaxvq(int8_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_u16))) uint16_t vmaxvq_u16(uint16_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_u16))) uint16_t vmaxvq(uint16_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_u32))) uint32_t vmaxvq_u32(uint32_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_u32))) uint32_t vmaxvq(uint32_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_u8))) uint8_t vmaxvq_u8(uint8_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxvq_u8))) uint8_t vmaxvq(uint8_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminaq_m_s16))) uint16x8_t vminaq_m_s16(uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminaq_m_s16))) uint16x8_t vminaq_m(uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminaq_m_s32))) uint32x4_t vminaq_m_s32(uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminaq_m_s32))) uint32x4_t vminaq_m(uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminaq_m_s8))) uint8x16_t vminaq_m_s8(uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminaq_m_s8))) uint8x16_t vminaq_m(uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminaq_s16))) uint16x8_t vminaq_s16(uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminaq_s16))) uint16x8_t vminaq(uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminaq_s32))) uint32x4_t vminaq_s32(uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminaq_s32))) uint32x4_t vminaq(uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminaq_s8))) uint8x16_t vminaq_s8(uint8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminaq_s8))) uint8x16_t vminaq(uint8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminavq_p_s16))) uint16_t vminavq_p_s16(uint16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminavq_p_s16))) uint16_t vminavq_p(uint16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminavq_p_s32))) uint32_t vminavq_p_s32(uint32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminavq_p_s32))) uint32_t vminavq_p(uint32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminavq_p_s8))) uint8_t vminavq_p_s8(uint8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminavq_p_s8))) uint8_t vminavq_p(uint8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminavq_s16))) uint16_t vminavq_s16(uint16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminavq_s16))) uint16_t vminavq(uint16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminavq_s32))) uint32_t vminavq_s32(uint32_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminavq_s32))) uint32_t vminavq(uint32_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminavq_s8))) uint8_t vminavq_s8(uint8_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminavq_s8))) uint8_t vminavq(uint8_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_s16))) int16x8_t vminq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_s16))) int16x8_t vminq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_s32))) int32x4_t vminq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_s32))) int32x4_t vminq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_s8))) int8x16_t vminq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_s8))) int8x16_t vminq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_u16))) uint16x8_t vminq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_u16))) uint16x8_t vminq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_u32))) uint32x4_t vminq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_u32))) uint32x4_t vminq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_u8))) uint8x16_t vminq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_m_u8))) uint8x16_t vminq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_s16))) int16x8_t vminq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_s16))) int16x8_t vminq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_s32))) int32x4_t vminq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_s32))) int32x4_t vminq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_s8))) int8x16_t vminq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_s8))) int8x16_t vminq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_u16))) uint16x8_t vminq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_u16))) uint16x8_t vminq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_u32))) uint32x4_t vminq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_u32))) uint32x4_t vminq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_u8))) uint8x16_t vminq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_u8))) uint8x16_t vminq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_s16))) int16x8_t vminq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_s16))) int16x8_t vminq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_s32))) int32x4_t vminq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_s32))) int32x4_t vminq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_s8))) int8x16_t vminq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_s8))) int8x16_t vminq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_u16))) uint16x8_t vminq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_u16))) uint16x8_t vminq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_u32))) uint32x4_t vminq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_u32))) uint32x4_t vminq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_u8))) uint8x16_t vminq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminq_x_u8))) uint8x16_t vminq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_s16))) int16_t vminvq_p_s16(int16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_s16))) int16_t vminvq_p(int16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_s32))) int32_t vminvq_p_s32(int32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_s32))) int32_t vminvq_p(int32_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_s8))) int8_t vminvq_p_s8(int8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_s8))) int8_t vminvq_p(int8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_u16))) uint16_t vminvq_p_u16(uint16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_u16))) uint16_t vminvq_p(uint16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_u32))) uint32_t vminvq_p_u32(uint32_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_u32))) uint32_t vminvq_p(uint32_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_u8))) uint8_t vminvq_p_u8(uint8_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_p_u8))) uint8_t vminvq_p(uint8_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_s16))) int16_t vminvq_s16(int16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_s16))) int16_t vminvq(int16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_s32))) int32_t vminvq_s32(int32_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_s32))) int32_t vminvq(int32_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_s8))) int8_t vminvq_s8(int8_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_s8))) int8_t vminvq(int8_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_u16))) uint16_t vminvq_u16(uint16_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_u16))) uint16_t vminvq(uint16_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_u32))) uint32_t vminvq_u32(uint32_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_u32))) uint32_t vminvq(uint32_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminvq_u8))) uint8_t vminvq_u8(uint8_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminvq_u8))) uint8_t vminvq(uint8_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_s16))) int32_t vmladavaq_p_s16(int32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_s16))) int32_t vmladavaq_p(int32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_s32))) int32_t vmladavaq_p_s32(int32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_s32))) int32_t vmladavaq_p(int32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_s8))) int32_t vmladavaq_p_s8(int32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_s8))) int32_t vmladavaq_p(int32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_u16))) uint32_t vmladavaq_p_u16(uint32_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_u16))) uint32_t vmladavaq_p(uint32_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_u32))) uint32_t vmladavaq_p_u32(uint32_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_u32))) uint32_t vmladavaq_p(uint32_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_u8))) uint32_t vmladavaq_p_u8(uint32_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_p_u8))) uint32_t vmladavaq_p(uint32_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_s16))) int32_t vmladavaq_s16(int32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_s16))) int32_t vmladavaq(int32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_s32))) int32_t vmladavaq_s32(int32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_s32))) int32_t vmladavaq(int32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_s8))) int32_t vmladavaq_s8(int32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_s8))) int32_t vmladavaq(int32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_u16))) uint32_t vmladavaq_u16(uint32_t, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_u16))) uint32_t vmladavaq(uint32_t, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_u32))) uint32_t vmladavaq_u32(uint32_t, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_u32))) uint32_t vmladavaq(uint32_t, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_u8))) uint32_t vmladavaq_u8(uint32_t, uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaq_u8))) uint32_t vmladavaq(uint32_t, uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_p_s16))) int32_t vmladavaxq_p_s16(int32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_p_s16))) int32_t vmladavaxq_p(int32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_p_s32))) int32_t vmladavaxq_p_s32(int32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_p_s32))) int32_t vmladavaxq_p(int32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_p_s8))) int32_t vmladavaxq_p_s8(int32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_p_s8))) int32_t vmladavaxq_p(int32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_s16))) int32_t vmladavaxq_s16(int32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_s16))) int32_t vmladavaxq(int32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_s32))) int32_t vmladavaxq_s32(int32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_s32))) int32_t vmladavaxq(int32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_s8))) int32_t vmladavaxq_s8(int32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavaxq_s8))) int32_t vmladavaxq(int32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_s16))) int32_t vmladavq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_s16))) int32_t vmladavq_p(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_s32))) int32_t vmladavq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_s32))) int32_t vmladavq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_s8))) int32_t vmladavq_p_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_s8))) int32_t vmladavq_p(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_u16))) uint32_t vmladavq_p_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_u16))) uint32_t vmladavq_p(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_u32))) uint32_t vmladavq_p_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_u32))) uint32_t vmladavq_p(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_u8))) uint32_t vmladavq_p_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_p_u8))) uint32_t vmladavq_p(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_s16))) int32_t vmladavq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_s16))) int32_t vmladavq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_s32))) int32_t vmladavq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_s32))) int32_t vmladavq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_s8))) int32_t vmladavq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_s8))) int32_t vmladavq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_u16))) uint32_t vmladavq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_u16))) uint32_t vmladavq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_u32))) uint32_t vmladavq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_u32))) uint32_t vmladavq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_u8))) uint32_t vmladavq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavq_u8))) uint32_t vmladavq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_p_s16))) int32_t vmladavxq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_p_s16))) int32_t vmladavxq_p(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_p_s32))) int32_t vmladavxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_p_s32))) int32_t vmladavxq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_p_s8))) int32_t vmladavxq_p_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_p_s8))) int32_t vmladavxq_p(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_s16))) int32_t vmladavxq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_s16))) int32_t vmladavxq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_s32))) int32_t vmladavxq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_s32))) int32_t vmladavxq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_s8))) int32_t vmladavxq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmladavxq_s8))) int32_t vmladavxq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_p_s16))) int64_t vmlaldavaq_p_s16(int64_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_p_s16))) int64_t vmlaldavaq_p(int64_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_p_s32))) int64_t vmlaldavaq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_p_s32))) int64_t vmlaldavaq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_p_u16))) uint64_t vmlaldavaq_p_u16(uint64_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_p_u16))) uint64_t vmlaldavaq_p(uint64_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_p_u32))) uint64_t vmlaldavaq_p_u32(uint64_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_p_u32))) uint64_t vmlaldavaq_p(uint64_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_s16))) int64_t vmlaldavaq_s16(int64_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_s16))) int64_t vmlaldavaq(int64_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_s32))) int64_t vmlaldavaq_s32(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_s32))) int64_t vmlaldavaq(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_u16))) uint64_t vmlaldavaq_u16(uint64_t, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_u16))) uint64_t vmlaldavaq(uint64_t, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_u32))) uint64_t vmlaldavaq_u32(uint64_t, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaq_u32))) uint64_t vmlaldavaq(uint64_t, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaxq_p_s16))) int64_t vmlaldavaxq_p_s16(int64_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaxq_p_s16))) int64_t vmlaldavaxq_p(int64_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaxq_p_s32))) int64_t vmlaldavaxq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaxq_p_s32))) int64_t vmlaldavaxq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaxq_s16))) int64_t vmlaldavaxq_s16(int64_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaxq_s16))) int64_t vmlaldavaxq(int64_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaxq_s32))) int64_t vmlaldavaxq_s32(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavaxq_s32))) int64_t vmlaldavaxq(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_p_s16))) int64_t vmlaldavq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_p_s16))) int64_t vmlaldavq_p(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_p_s32))) int64_t vmlaldavq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_p_s32))) int64_t vmlaldavq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_p_u16))) uint64_t vmlaldavq_p_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_p_u16))) uint64_t vmlaldavq_p(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_p_u32))) uint64_t vmlaldavq_p_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_p_u32))) uint64_t vmlaldavq_p(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_s16))) int64_t vmlaldavq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_s16))) int64_t vmlaldavq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_s32))) int64_t vmlaldavq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_s32))) int64_t vmlaldavq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_u16))) uint64_t vmlaldavq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_u16))) uint64_t vmlaldavq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_u32))) uint64_t vmlaldavq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavq_u32))) uint64_t vmlaldavq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavxq_p_s16))) int64_t vmlaldavxq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavxq_p_s16))) int64_t vmlaldavxq_p(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavxq_p_s32))) int64_t vmlaldavxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavxq_p_s32))) int64_t vmlaldavxq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavxq_s16))) int64_t vmlaldavxq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavxq_s16))) int64_t vmlaldavxq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavxq_s32))) int64_t vmlaldavxq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaldavxq_s32))) int64_t vmlaldavxq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_s16))) int16x8_t vmlaq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_s16))) int16x8_t vmlaq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_s32))) int32x4_t vmlaq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_s32))) int32x4_t vmlaq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_s8))) int8x16_t vmlaq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_s8))) int8x16_t vmlaq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_u16))) uint16x8_t vmlaq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_u16))) uint16x8_t vmlaq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_u32))) uint32x4_t vmlaq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_u32))) uint32x4_t vmlaq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_u8))) uint8x16_t vmlaq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_m_n_u8))) uint8x16_t vmlaq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_s16))) int16x8_t vmlaq_n_s16(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_s16))) int16x8_t vmlaq(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_s32))) int32x4_t vmlaq_n_s32(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_s32))) int32x4_t vmlaq(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_s8))) int8x16_t vmlaq_n_s8(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_s8))) int8x16_t vmlaq(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_u16))) uint16x8_t vmlaq_n_u16(uint16x8_t, uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_u16))) uint16x8_t vmlaq(uint16x8_t, uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_u32))) uint32x4_t vmlaq_n_u32(uint32x4_t, uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_u32))) uint32x4_t vmlaq(uint32x4_t, uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_u8))) uint8x16_t vmlaq_n_u8(uint8x16_t, uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlaq_n_u8))) uint8x16_t vmlaq(uint8x16_t, uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_s16))) int16x8_t vmlasq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_s16))) int16x8_t vmlasq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_s32))) int32x4_t vmlasq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_s32))) int32x4_t vmlasq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_s8))) int8x16_t vmlasq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_s8))) int8x16_t vmlasq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_u16))) uint16x8_t vmlasq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_u16))) uint16x8_t vmlasq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_u32))) uint32x4_t vmlasq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_u32))) uint32x4_t vmlasq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_u8))) uint8x16_t vmlasq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_m_n_u8))) uint8x16_t vmlasq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_s16))) int16x8_t vmlasq_n_s16(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_s16))) int16x8_t vmlasq(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_s32))) int32x4_t vmlasq_n_s32(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_s32))) int32x4_t vmlasq(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_s8))) int8x16_t vmlasq_n_s8(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_s8))) int8x16_t vmlasq(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_u16))) uint16x8_t vmlasq_n_u16(uint16x8_t, uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_u16))) uint16x8_t vmlasq(uint16x8_t, uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_u32))) uint32x4_t vmlasq_n_u32(uint32x4_t, uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_u32))) uint32x4_t vmlasq(uint32x4_t, uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_u8))) uint8x16_t vmlasq_n_u8(uint8x16_t, uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlasq_n_u8))) uint8x16_t vmlasq(uint8x16_t, uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_p_s16))) int32_t vmlsdavaq_p_s16(int32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_p_s16))) int32_t vmlsdavaq_p(int32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_p_s32))) int32_t vmlsdavaq_p_s32(int32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_p_s32))) int32_t vmlsdavaq_p(int32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_p_s8))) int32_t vmlsdavaq_p_s8(int32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_p_s8))) int32_t vmlsdavaq_p(int32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_s16))) int32_t vmlsdavaq_s16(int32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_s16))) int32_t vmlsdavaq(int32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_s32))) int32_t vmlsdavaq_s32(int32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_s32))) int32_t vmlsdavaq(int32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_s8))) int32_t vmlsdavaq_s8(int32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaq_s8))) int32_t vmlsdavaq(int32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_p_s16))) int32_t vmlsdavaxq_p_s16(int32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_p_s16))) int32_t vmlsdavaxq_p(int32_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_p_s32))) int32_t vmlsdavaxq_p_s32(int32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_p_s32))) int32_t vmlsdavaxq_p(int32_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_p_s8))) int32_t vmlsdavaxq_p_s8(int32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_p_s8))) int32_t vmlsdavaxq_p(int32_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_s16))) int32_t vmlsdavaxq_s16(int32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_s16))) int32_t vmlsdavaxq(int32_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_s32))) int32_t vmlsdavaxq_s32(int32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_s32))) int32_t vmlsdavaxq(int32_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_s8))) int32_t vmlsdavaxq_s8(int32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavaxq_s8))) int32_t vmlsdavaxq(int32_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_p_s16))) int32_t vmlsdavq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_p_s16))) int32_t vmlsdavq_p(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_p_s32))) int32_t vmlsdavq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_p_s32))) int32_t vmlsdavq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_p_s8))) int32_t vmlsdavq_p_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_p_s8))) int32_t vmlsdavq_p(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_s16))) int32_t vmlsdavq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_s16))) int32_t vmlsdavq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_s32))) int32_t vmlsdavq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_s32))) int32_t vmlsdavq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_s8))) int32_t vmlsdavq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavq_s8))) int32_t vmlsdavq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_p_s16))) int32_t vmlsdavxq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_p_s16))) int32_t vmlsdavxq_p(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_p_s32))) int32_t vmlsdavxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_p_s32))) int32_t vmlsdavxq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_p_s8))) int32_t vmlsdavxq_p_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_p_s8))) int32_t vmlsdavxq_p(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_s16))) int32_t vmlsdavxq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_s16))) int32_t vmlsdavxq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_s32))) int32_t vmlsdavxq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_s32))) int32_t vmlsdavxq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_s8))) int32_t vmlsdavxq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsdavxq_s8))) int32_t vmlsdavxq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaq_p_s16))) int64_t vmlsldavaq_p_s16(int64_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaq_p_s16))) int64_t vmlsldavaq_p(int64_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaq_p_s32))) int64_t vmlsldavaq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaq_p_s32))) int64_t vmlsldavaq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaq_s16))) int64_t vmlsldavaq_s16(int64_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaq_s16))) int64_t vmlsldavaq(int64_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaq_s32))) int64_t vmlsldavaq_s32(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaq_s32))) int64_t vmlsldavaq(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaxq_p_s16))) int64_t vmlsldavaxq_p_s16(int64_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaxq_p_s16))) int64_t vmlsldavaxq_p(int64_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaxq_p_s32))) int64_t vmlsldavaxq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaxq_p_s32))) int64_t vmlsldavaxq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaxq_s16))) int64_t vmlsldavaxq_s16(int64_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaxq_s16))) int64_t vmlsldavaxq(int64_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaxq_s32))) int64_t vmlsldavaxq_s32(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavaxq_s32))) int64_t vmlsldavaxq(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavq_p_s16))) int64_t vmlsldavq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavq_p_s16))) int64_t vmlsldavq_p(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavq_p_s32))) int64_t vmlsldavq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavq_p_s32))) int64_t vmlsldavq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavq_s16))) int64_t vmlsldavq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavq_s16))) int64_t vmlsldavq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavq_s32))) int64_t vmlsldavq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavq_s32))) int64_t vmlsldavq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavxq_p_s16))) int64_t vmlsldavxq_p_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavxq_p_s16))) int64_t vmlsldavxq_p(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavxq_p_s32))) int64_t vmlsldavxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavxq_p_s32))) int64_t vmlsldavxq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavxq_s16))) int64_t vmlsldavxq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavxq_s16))) int64_t vmlsldavxq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavxq_s32))) int64_t vmlsldavxq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmlsldavxq_s32))) int64_t vmlsldavxq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_m_s16))) int32x4_t vmovlbq_m_s16(int32x4_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_m_s16))) int32x4_t vmovlbq_m(int32x4_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_m_s8))) int16x8_t vmovlbq_m_s8(int16x8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_m_s8))) int16x8_t vmovlbq_m(int16x8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_m_u16))) uint32x4_t vmovlbq_m_u16(uint32x4_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_m_u16))) uint32x4_t vmovlbq_m(uint32x4_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_m_u8))) uint16x8_t vmovlbq_m_u8(uint16x8_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_m_u8))) uint16x8_t vmovlbq_m(uint16x8_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_s16))) int32x4_t vmovlbq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_s16))) int32x4_t vmovlbq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_s8))) int16x8_t vmovlbq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_s8))) int16x8_t vmovlbq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_u16))) uint32x4_t vmovlbq_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_u16))) uint32x4_t vmovlbq(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_u8))) uint16x8_t vmovlbq_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_u8))) uint16x8_t vmovlbq(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_x_s16))) int32x4_t vmovlbq_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_x_s16))) int32x4_t vmovlbq_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_x_s8))) int16x8_t vmovlbq_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_x_s8))) int16x8_t vmovlbq_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_x_u16))) uint32x4_t vmovlbq_x_u16(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_x_u16))) uint32x4_t vmovlbq_x(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_x_u8))) uint16x8_t vmovlbq_x_u8(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovlbq_x_u8))) uint16x8_t vmovlbq_x(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_m_s16))) int32x4_t vmovltq_m_s16(int32x4_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_m_s16))) int32x4_t vmovltq_m(int32x4_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_m_s8))) int16x8_t vmovltq_m_s8(int16x8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_m_s8))) int16x8_t vmovltq_m(int16x8_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_m_u16))) uint32x4_t vmovltq_m_u16(uint32x4_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_m_u16))) uint32x4_t vmovltq_m(uint32x4_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_m_u8))) uint16x8_t vmovltq_m_u8(uint16x8_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_m_u8))) uint16x8_t vmovltq_m(uint16x8_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_s16))) int32x4_t vmovltq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_s16))) int32x4_t vmovltq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_s8))) int16x8_t vmovltq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_s8))) int16x8_t vmovltq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_u16))) uint32x4_t vmovltq_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_u16))) uint32x4_t vmovltq(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_u8))) uint16x8_t vmovltq_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_u8))) uint16x8_t vmovltq(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_x_s16))) int32x4_t vmovltq_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_x_s16))) int32x4_t vmovltq_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_x_s8))) int16x8_t vmovltq_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_x_s8))) int16x8_t vmovltq_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_x_u16))) uint32x4_t vmovltq_x_u16(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_x_u16))) uint32x4_t vmovltq_x(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_x_u8))) uint16x8_t vmovltq_x_u8(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovltq_x_u8))) uint16x8_t vmovltq_x(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_m_s16))) int8x16_t vmovnbq_m_s16(int8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_m_s16))) int8x16_t vmovnbq_m(int8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_m_s32))) int16x8_t vmovnbq_m_s32(int16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_m_s32))) int16x8_t vmovnbq_m(int16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_m_u16))) uint8x16_t vmovnbq_m_u16(uint8x16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_m_u16))) uint8x16_t vmovnbq_m(uint8x16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_m_u32))) uint16x8_t vmovnbq_m_u32(uint16x8_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_m_u32))) uint16x8_t vmovnbq_m(uint16x8_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_s16))) int8x16_t vmovnbq_s16(int8x16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_s16))) int8x16_t vmovnbq(int8x16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_s32))) int16x8_t vmovnbq_s32(int16x8_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_s32))) int16x8_t vmovnbq(int16x8_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_u16))) uint8x16_t vmovnbq_u16(uint8x16_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_u16))) uint8x16_t vmovnbq(uint8x16_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_u32))) uint16x8_t vmovnbq_u32(uint16x8_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovnbq_u32))) uint16x8_t vmovnbq(uint16x8_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_m_s16))) int8x16_t vmovntq_m_s16(int8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_m_s16))) int8x16_t vmovntq_m(int8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_m_s32))) int16x8_t vmovntq_m_s32(int16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_m_s32))) int16x8_t vmovntq_m(int16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_m_u16))) uint8x16_t vmovntq_m_u16(uint8x16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_m_u16))) uint8x16_t vmovntq_m(uint8x16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_m_u32))) uint16x8_t vmovntq_m_u32(uint16x8_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_m_u32))) uint16x8_t vmovntq_m(uint16x8_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_s16))) int8x16_t vmovntq_s16(int8x16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_s16))) int8x16_t vmovntq(int8x16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_s32))) int16x8_t vmovntq_s32(int16x8_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_s32))) int16x8_t vmovntq(int16x8_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_u16))) uint8x16_t vmovntq_u16(uint8x16_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_u16))) uint8x16_t vmovntq(uint8x16_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_u32))) uint16x8_t vmovntq_u32(uint16x8_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmovntq_u32))) uint16x8_t vmovntq(uint16x8_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_s16))) int16x8_t vmulhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_s16))) int16x8_t vmulhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_s32))) int32x4_t vmulhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_s32))) int32x4_t vmulhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_s8))) int8x16_t vmulhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_s8))) int8x16_t vmulhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_u16))) uint16x8_t vmulhq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_u16))) uint16x8_t vmulhq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_u32))) uint32x4_t vmulhq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_u32))) uint32x4_t vmulhq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_u8))) uint8x16_t vmulhq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_m_u8))) uint8x16_t vmulhq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_s16))) int16x8_t vmulhq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_s16))) int16x8_t vmulhq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_s32))) int32x4_t vmulhq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_s32))) int32x4_t vmulhq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_s8))) int8x16_t vmulhq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_s8))) int8x16_t vmulhq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_u16))) uint16x8_t vmulhq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_u16))) uint16x8_t vmulhq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_u32))) uint32x4_t vmulhq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_u32))) uint32x4_t vmulhq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_u8))) uint8x16_t vmulhq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_u8))) uint8x16_t vmulhq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_s16))) int16x8_t vmulhq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_s16))) int16x8_t vmulhq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_s32))) int32x4_t vmulhq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_s32))) int32x4_t vmulhq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_s8))) int8x16_t vmulhq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_s8))) int8x16_t vmulhq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_u16))) uint16x8_t vmulhq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_u16))) uint16x8_t vmulhq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_u32))) uint32x4_t vmulhq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_u32))) uint32x4_t vmulhq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_u8))) uint8x16_t vmulhq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulhq_x_u8))) uint8x16_t vmulhq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_s16))) int32x4_t vmullbq_int_m_s16(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_s16))) int32x4_t vmullbq_int_m(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_s32))) int64x2_t vmullbq_int_m_s32(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_s32))) int64x2_t vmullbq_int_m(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_s8))) int16x8_t vmullbq_int_m_s8(int16x8_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_s8))) int16x8_t vmullbq_int_m(int16x8_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_u16))) uint32x4_t vmullbq_int_m_u16(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_u16))) uint32x4_t vmullbq_int_m(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_u32))) uint64x2_t vmullbq_int_m_u32(uint64x2_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_u32))) uint64x2_t vmullbq_int_m(uint64x2_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_u8))) uint16x8_t vmullbq_int_m_u8(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_m_u8))) uint16x8_t vmullbq_int_m(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_s16))) int32x4_t vmullbq_int_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_s16))) int32x4_t vmullbq_int(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_s32))) int64x2_t vmullbq_int_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_s32))) int64x2_t vmullbq_int(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_s8))) int16x8_t vmullbq_int_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_s8))) int16x8_t vmullbq_int(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_u16))) uint32x4_t vmullbq_int_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_u16))) uint32x4_t vmullbq_int(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_u32))) uint64x2_t vmullbq_int_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_u32))) uint64x2_t vmullbq_int(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_u8))) uint16x8_t vmullbq_int_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_u8))) uint16x8_t vmullbq_int(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_s16))) int32x4_t vmullbq_int_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_s16))) int32x4_t vmullbq_int_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_s32))) int64x2_t vmullbq_int_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_s32))) int64x2_t vmullbq_int_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_s8))) int16x8_t vmullbq_int_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_s8))) int16x8_t vmullbq_int_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_u16))) uint32x4_t vmullbq_int_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_u16))) uint32x4_t vmullbq_int_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_u32))) uint64x2_t vmullbq_int_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_u32))) uint64x2_t vmullbq_int_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_u8))) uint16x8_t vmullbq_int_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_int_x_u8))) uint16x8_t vmullbq_int_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_m_p16))) uint32x4_t vmullbq_poly_m_p16(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_m_p16))) uint32x4_t vmullbq_poly_m(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_m_p8))) uint16x8_t vmullbq_poly_m_p8(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_m_p8))) uint16x8_t vmullbq_poly_m(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_p16))) uint32x4_t vmullbq_poly_p16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_p16))) uint32x4_t vmullbq_poly(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_p8))) uint16x8_t vmullbq_poly_p8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_p8))) uint16x8_t vmullbq_poly(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_x_p16))) uint32x4_t vmullbq_poly_x_p16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_x_p16))) uint32x4_t vmullbq_poly_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_x_p8))) uint16x8_t vmullbq_poly_x_p8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmullbq_poly_x_p8))) uint16x8_t vmullbq_poly_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_s16))) int32x4_t vmulltq_int_m_s16(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_s16))) int32x4_t vmulltq_int_m(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_s32))) int64x2_t vmulltq_int_m_s32(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_s32))) int64x2_t vmulltq_int_m(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_s8))) int16x8_t vmulltq_int_m_s8(int16x8_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_s8))) int16x8_t vmulltq_int_m(int16x8_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_u16))) uint32x4_t vmulltq_int_m_u16(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_u16))) uint32x4_t vmulltq_int_m(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_u32))) uint64x2_t vmulltq_int_m_u32(uint64x2_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_u32))) uint64x2_t vmulltq_int_m(uint64x2_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_u8))) uint16x8_t vmulltq_int_m_u8(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_m_u8))) uint16x8_t vmulltq_int_m(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_s16))) int32x4_t vmulltq_int_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_s16))) int32x4_t vmulltq_int(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_s32))) int64x2_t vmulltq_int_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_s32))) int64x2_t vmulltq_int(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_s8))) int16x8_t vmulltq_int_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_s8))) int16x8_t vmulltq_int(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_u16))) uint32x4_t vmulltq_int_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_u16))) uint32x4_t vmulltq_int(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_u32))) uint64x2_t vmulltq_int_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_u32))) uint64x2_t vmulltq_int(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_u8))) uint16x8_t vmulltq_int_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_u8))) uint16x8_t vmulltq_int(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_s16))) int32x4_t vmulltq_int_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_s16))) int32x4_t vmulltq_int_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_s32))) int64x2_t vmulltq_int_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_s32))) int64x2_t vmulltq_int_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_s8))) int16x8_t vmulltq_int_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_s8))) int16x8_t vmulltq_int_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_u16))) uint32x4_t vmulltq_int_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_u16))) uint32x4_t vmulltq_int_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_u32))) uint64x2_t vmulltq_int_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_u32))) uint64x2_t vmulltq_int_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_u8))) uint16x8_t vmulltq_int_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_int_x_u8))) uint16x8_t vmulltq_int_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_m_p16))) uint32x4_t vmulltq_poly_m_p16(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_m_p16))) uint32x4_t vmulltq_poly_m(uint32x4_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_m_p8))) uint16x8_t vmulltq_poly_m_p8(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_m_p8))) uint16x8_t vmulltq_poly_m(uint16x8_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_p16))) uint32x4_t vmulltq_poly_p16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_p16))) uint32x4_t vmulltq_poly(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_p8))) uint16x8_t vmulltq_poly_p8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_p8))) uint16x8_t vmulltq_poly(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_x_p16))) uint32x4_t vmulltq_poly_x_p16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_x_p16))) uint32x4_t vmulltq_poly_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_x_p8))) uint16x8_t vmulltq_poly_x_p8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulltq_poly_x_p8))) uint16x8_t vmulltq_poly_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_s16))) int16x8_t vmulq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_s16))) int16x8_t vmulq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_s32))) int32x4_t vmulq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_s32))) int32x4_t vmulq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_s8))) int8x16_t vmulq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_s8))) int8x16_t vmulq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_u16))) uint16x8_t vmulq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_u16))) uint16x8_t vmulq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_u32))) uint32x4_t vmulq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_u32))) uint32x4_t vmulq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_u8))) uint8x16_t vmulq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_u8))) uint8x16_t vmulq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_s16))) int16x8_t vmulq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_s16))) int16x8_t vmulq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_s32))) int32x4_t vmulq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_s32))) int32x4_t vmulq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_s8))) int8x16_t vmulq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_s8))) int8x16_t vmulq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_u16))) uint16x8_t vmulq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_u16))) uint16x8_t vmulq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_u32))) uint32x4_t vmulq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_u32))) uint32x4_t vmulq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_u8))) uint8x16_t vmulq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_u8))) uint8x16_t vmulq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_s16))) int16x8_t vmulq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_s16))) int16x8_t vmulq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_s32))) int32x4_t vmulq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_s32))) int32x4_t vmulq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_s8))) int8x16_t vmulq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_s8))) int8x16_t vmulq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_u16))) uint16x8_t vmulq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_u16))) uint16x8_t vmulq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_u32))) uint32x4_t vmulq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_u32))) uint32x4_t vmulq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_u8))) uint8x16_t vmulq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_u8))) uint8x16_t vmulq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_s16))) int16x8_t vmulq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_s16))) int16x8_t vmulq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_s32))) int32x4_t vmulq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_s32))) int32x4_t vmulq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_s8))) int8x16_t vmulq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_s8))) int8x16_t vmulq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_u16))) uint16x8_t vmulq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_u16))) uint16x8_t vmulq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_u32))) uint32x4_t vmulq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_u32))) uint32x4_t vmulq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_u8))) uint8x16_t vmulq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_u8))) uint8x16_t vmulq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_s16))) int16x8_t vmulq_x_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_s16))) int16x8_t vmulq_x(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_s32))) int32x4_t vmulq_x_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_s32))) int32x4_t vmulq_x(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_s8))) int8x16_t vmulq_x_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_s8))) int8x16_t vmulq_x(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_u16))) uint16x8_t vmulq_x_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_u16))) uint16x8_t vmulq_x(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_u32))) uint32x4_t vmulq_x_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_u32))) uint32x4_t vmulq_x(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_u8))) uint8x16_t vmulq_x_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_u8))) uint8x16_t vmulq_x(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_s16))) int16x8_t vmulq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_s16))) int16x8_t vmulq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_s32))) int32x4_t vmulq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_s32))) int32x4_t vmulq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_s8))) int8x16_t vmulq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_s8))) int8x16_t vmulq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_u16))) uint16x8_t vmulq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_u16))) uint16x8_t vmulq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_u32))) uint32x4_t vmulq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_u32))) uint32x4_t vmulq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_u8))) uint8x16_t vmulq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_u8))) uint8x16_t vmulq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_n_s16))) int16x8_t vmvnq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_n_s16))) int16x8_t vmvnq_m(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_n_s32))) int32x4_t vmvnq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_n_s32))) int32x4_t vmvnq_m(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_n_u16))) uint16x8_t vmvnq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_n_u16))) uint16x8_t vmvnq_m(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_n_u32))) uint32x4_t vmvnq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_n_u32))) uint32x4_t vmvnq_m(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_s16))) int16x8_t vmvnq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_s16))) int16x8_t vmvnq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_s32))) int32x4_t vmvnq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_s32))) int32x4_t vmvnq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_s8))) int8x16_t vmvnq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_s8))) int8x16_t vmvnq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_u16))) uint16x8_t vmvnq_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_u16))) uint16x8_t vmvnq_m(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_u32))) uint32x4_t vmvnq_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_u32))) uint32x4_t vmvnq_m(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_u8))) uint8x16_t vmvnq_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_m_u8))) uint8x16_t vmvnq_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_n_s16))) int16x8_t vmvnq_n_s16(int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_n_s32))) int32x4_t vmvnq_n_s32(int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_n_u16))) uint16x8_t vmvnq_n_u16(uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_n_u32))) uint32x4_t vmvnq_n_u32(uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_s16))) int16x8_t vmvnq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_s16))) int16x8_t vmvnq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_s32))) int32x4_t vmvnq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_s32))) int32x4_t vmvnq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_s8))) int8x16_t vmvnq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_s8))) int8x16_t vmvnq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_u16))) uint16x8_t vmvnq_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_u16))) uint16x8_t vmvnq(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_u32))) uint32x4_t vmvnq_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_u32))) uint32x4_t vmvnq(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_u8))) uint8x16_t vmvnq_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_u8))) uint8x16_t vmvnq(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_n_s16))) int16x8_t vmvnq_x_n_s16(int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_n_s32))) int32x4_t vmvnq_x_n_s32(int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_n_u16))) uint16x8_t vmvnq_x_n_u16(uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_n_u32))) uint32x4_t vmvnq_x_n_u32(uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_s16))) int16x8_t vmvnq_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_s16))) int16x8_t vmvnq_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_s32))) int32x4_t vmvnq_x_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_s32))) int32x4_t vmvnq_x(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_s8))) int8x16_t vmvnq_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_s8))) int8x16_t vmvnq_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_u16))) uint16x8_t vmvnq_x_u16(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_u16))) uint16x8_t vmvnq_x(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_u32))) uint32x4_t vmvnq_x_u32(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_u32))) uint32x4_t vmvnq_x(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_u8))) uint8x16_t vmvnq_x_u8(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmvnq_x_u8))) uint8x16_t vmvnq_x(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_s16))) int16x8_t vnegq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_s16))) int16x8_t vnegq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_s32))) int32x4_t vnegq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_s32))) int32x4_t vnegq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_s8))) int8x16_t vnegq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_s8))) int8x16_t vnegq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_s16))) int16x8_t vnegq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_s16))) int16x8_t vnegq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_s32))) int32x4_t vnegq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_s32))) int32x4_t vnegq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_s8))) int8x16_t vnegq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_s8))) int8x16_t vnegq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_s16))) int16x8_t vnegq_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_s16))) int16x8_t vnegq_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_s32))) int32x4_t vnegq_x_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_s32))) int32x4_t vnegq_x(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_s8))) int8x16_t vnegq_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_s8))) int8x16_t vnegq_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_s16))) int16x8_t vornq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_s16))) int16x8_t vornq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_s32))) int32x4_t vornq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_s32))) int32x4_t vornq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_s8))) int8x16_t vornq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_s8))) int8x16_t vornq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_u16))) uint16x8_t vornq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_u16))) uint16x8_t vornq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_u32))) uint32x4_t vornq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_u32))) uint32x4_t vornq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_u8))) uint8x16_t vornq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_u8))) uint8x16_t vornq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_s16))) int16x8_t vornq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_s16))) int16x8_t vornq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_s32))) int32x4_t vornq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_s32))) int32x4_t vornq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_s8))) int8x16_t vornq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_s8))) int8x16_t vornq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_u16))) uint16x8_t vornq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_u16))) uint16x8_t vornq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_u32))) uint32x4_t vornq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_u32))) uint32x4_t vornq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_u8))) uint8x16_t vornq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_u8))) uint8x16_t vornq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_s16))) int16x8_t vornq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_s16))) int16x8_t vornq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_s32))) int32x4_t vornq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_s32))) int32x4_t vornq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_s8))) int8x16_t vornq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_s8))) int8x16_t vornq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_u16))) uint16x8_t vornq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_u16))) uint16x8_t vornq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_u32))) uint32x4_t vornq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_u32))) uint32x4_t vornq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_u8))) uint8x16_t vornq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_u8))) uint8x16_t vornq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_n_s16))) int16x8_t vorrq_m_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_n_s16))) int16x8_t vorrq_m_n(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_n_s32))) int32x4_t vorrq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_n_s32))) int32x4_t vorrq_m_n(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_n_u16))) uint16x8_t vorrq_m_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_n_u16))) uint16x8_t vorrq_m_n(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_n_u32))) uint32x4_t vorrq_m_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_n_u32))) uint32x4_t vorrq_m_n(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_s16))) int16x8_t vorrq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_s16))) int16x8_t vorrq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_s32))) int32x4_t vorrq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_s32))) int32x4_t vorrq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_s8))) int8x16_t vorrq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_s8))) int8x16_t vorrq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_u16))) uint16x8_t vorrq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_u16))) uint16x8_t vorrq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_u32))) uint32x4_t vorrq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_u32))) uint32x4_t vorrq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_u8))) uint8x16_t vorrq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_u8))) uint8x16_t vorrq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_n_s16))) int16x8_t vorrq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_n_s16))) int16x8_t vorrq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_n_s32))) int32x4_t vorrq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_n_s32))) int32x4_t vorrq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_n_u16))) uint16x8_t vorrq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_n_u16))) uint16x8_t vorrq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_n_u32))) uint32x4_t vorrq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_n_u32))) uint32x4_t vorrq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_s16))) int16x8_t vorrq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_s16))) int16x8_t vorrq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_s32))) int32x4_t vorrq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_s32))) int32x4_t vorrq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_s8))) int8x16_t vorrq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_s8))) int8x16_t vorrq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_u16))) uint16x8_t vorrq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_u16))) uint16x8_t vorrq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_u32))) uint32x4_t vorrq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_u32))) uint32x4_t vorrq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_u8))) uint8x16_t vorrq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_u8))) uint8x16_t vorrq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_s16))) int16x8_t vorrq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_s16))) int16x8_t vorrq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_s32))) int32x4_t vorrq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_s32))) int32x4_t vorrq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_s8))) int8x16_t vorrq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_s8))) int8x16_t vorrq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_u16))) uint16x8_t vorrq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_u16))) uint16x8_t vorrq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_u32))) uint32x4_t vorrq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_u32))) uint32x4_t vorrq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_u8))) uint8x16_t vorrq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_u8))) uint8x16_t vorrq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpnot))) mve_pred16_t vpnot(mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_s16))) int16x8_t vpselq_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_s16))) int16x8_t vpselq(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_s32))) int32x4_t vpselq_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_s32))) int32x4_t vpselq(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_s64))) int64x2_t vpselq_s64(int64x2_t, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_s64))) int64x2_t vpselq(int64x2_t, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_s8))) int8x16_t vpselq_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_s8))) int8x16_t vpselq(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_u16))) uint16x8_t vpselq_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_u16))) uint16x8_t vpselq(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_u32))) uint32x4_t vpselq_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_u32))) uint32x4_t vpselq(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_u64))) uint64x2_t vpselq_u64(uint64x2_t, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_u64))) uint64x2_t vpselq(uint64x2_t, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_u8))) uint8x16_t vpselq_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_u8))) uint8x16_t vpselq(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_m_s16))) int16x8_t vqabsq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_m_s16))) int16x8_t vqabsq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_m_s32))) int32x4_t vqabsq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_m_s32))) int32x4_t vqabsq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_m_s8))) int8x16_t vqabsq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_m_s8))) int8x16_t vqabsq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_s16))) int16x8_t vqabsq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_s16))) int16x8_t vqabsq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_s32))) int32x4_t vqabsq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_s32))) int32x4_t vqabsq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_s8))) int8x16_t vqabsq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqabsq_s8))) int8x16_t vqabsq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_s16))) int16x8_t vqaddq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_s16))) int16x8_t vqaddq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_s32))) int32x4_t vqaddq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_s32))) int32x4_t vqaddq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_s8))) int8x16_t vqaddq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_s8))) int8x16_t vqaddq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_u16))) uint16x8_t vqaddq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_u16))) uint16x8_t vqaddq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_u32))) uint32x4_t vqaddq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_u32))) uint32x4_t vqaddq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_u8))) uint8x16_t vqaddq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_n_u8))) uint8x16_t vqaddq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_s16))) int16x8_t vqaddq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_s16))) int16x8_t vqaddq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_s32))) int32x4_t vqaddq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_s32))) int32x4_t vqaddq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_s8))) int8x16_t vqaddq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_s8))) int8x16_t vqaddq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_u16))) uint16x8_t vqaddq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_u16))) uint16x8_t vqaddq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_u32))) uint32x4_t vqaddq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_u32))) uint32x4_t vqaddq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_u8))) uint8x16_t vqaddq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_m_u8))) uint8x16_t vqaddq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_s16))) int16x8_t vqaddq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_s16))) int16x8_t vqaddq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_s32))) int32x4_t vqaddq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_s32))) int32x4_t vqaddq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_s8))) int8x16_t vqaddq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_s8))) int8x16_t vqaddq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_u16))) uint16x8_t vqaddq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_u16))) uint16x8_t vqaddq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_u32))) uint32x4_t vqaddq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_u32))) uint32x4_t vqaddq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_u8))) uint8x16_t vqaddq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_n_u8))) uint8x16_t vqaddq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_s16))) int16x8_t vqaddq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_s16))) int16x8_t vqaddq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_s32))) int32x4_t vqaddq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_s32))) int32x4_t vqaddq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_s8))) int8x16_t vqaddq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_s8))) int8x16_t vqaddq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_u16))) uint16x8_t vqaddq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_u16))) uint16x8_t vqaddq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_u32))) uint32x4_t vqaddq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_u32))) uint32x4_t vqaddq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_u8))) uint8x16_t vqaddq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqaddq_u8))) uint8x16_t vqaddq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_m_s16))) int16x8_t vqdmladhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_m_s16))) int16x8_t vqdmladhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_m_s32))) int32x4_t vqdmladhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_m_s32))) int32x4_t vqdmladhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_m_s8))) int8x16_t vqdmladhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_m_s8))) int8x16_t vqdmladhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_s16))) int16x8_t vqdmladhq_s16(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_s16))) int16x8_t vqdmladhq(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_s32))) int32x4_t vqdmladhq_s32(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_s32))) int32x4_t vqdmladhq(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_s8))) int8x16_t vqdmladhq_s8(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhq_s8))) int8x16_t vqdmladhq(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_m_s16))) int16x8_t vqdmladhxq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_m_s16))) int16x8_t vqdmladhxq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_m_s32))) int32x4_t vqdmladhxq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_m_s32))) int32x4_t vqdmladhxq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_m_s8))) int8x16_t vqdmladhxq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_m_s8))) int8x16_t vqdmladhxq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_s16))) int16x8_t vqdmladhxq_s16(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_s16))) int16x8_t vqdmladhxq(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_s32))) int32x4_t vqdmladhxq_s32(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_s32))) int32x4_t vqdmladhxq(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_s8))) int8x16_t vqdmladhxq_s8(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmladhxq_s8))) int8x16_t vqdmladhxq(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_m_n_s16))) int16x8_t vqdmlahq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_m_n_s16))) int16x8_t vqdmlahq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_m_n_s32))) int32x4_t vqdmlahq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_m_n_s32))) int32x4_t vqdmlahq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_m_n_s8))) int8x16_t vqdmlahq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_m_n_s8))) int8x16_t vqdmlahq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_n_s16))) int16x8_t vqdmlahq_n_s16(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_n_s16))) int16x8_t vqdmlahq(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_n_s32))) int32x4_t vqdmlahq_n_s32(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_n_s32))) int32x4_t vqdmlahq(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_n_s8))) int8x16_t vqdmlahq_n_s8(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlahq_n_s8))) int8x16_t vqdmlahq(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_m_n_s16))) int16x8_t vqdmlashq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_m_n_s16))) int16x8_t vqdmlashq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_m_n_s32))) int32x4_t vqdmlashq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_m_n_s32))) int32x4_t vqdmlashq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_m_n_s8))) int8x16_t vqdmlashq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_m_n_s8))) int8x16_t vqdmlashq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_n_s16))) int16x8_t vqdmlashq_n_s16(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_n_s16))) int16x8_t vqdmlashq(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_n_s32))) int32x4_t vqdmlashq_n_s32(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_n_s32))) int32x4_t vqdmlashq(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_n_s8))) int8x16_t vqdmlashq_n_s8(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlashq_n_s8))) int8x16_t vqdmlashq(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_m_s16))) int16x8_t vqdmlsdhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_m_s16))) int16x8_t vqdmlsdhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_m_s32))) int32x4_t vqdmlsdhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_m_s32))) int32x4_t vqdmlsdhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_m_s8))) int8x16_t vqdmlsdhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_m_s8))) int8x16_t vqdmlsdhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_s16))) int16x8_t vqdmlsdhq_s16(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_s16))) int16x8_t vqdmlsdhq(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_s32))) int32x4_t vqdmlsdhq_s32(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_s32))) int32x4_t vqdmlsdhq(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_s8))) int8x16_t vqdmlsdhq_s8(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhq_s8))) int8x16_t vqdmlsdhq(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_m_s16))) int16x8_t vqdmlsdhxq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_m_s16))) int16x8_t vqdmlsdhxq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_m_s32))) int32x4_t vqdmlsdhxq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_m_s32))) int32x4_t vqdmlsdhxq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_m_s8))) int8x16_t vqdmlsdhxq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_m_s8))) int8x16_t vqdmlsdhxq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_s16))) int16x8_t vqdmlsdhxq_s16(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_s16))) int16x8_t vqdmlsdhxq(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_s32))) int32x4_t vqdmlsdhxq_s32(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_s32))) int32x4_t vqdmlsdhxq(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_s8))) int8x16_t vqdmlsdhxq_s8(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmlsdhxq_s8))) int8x16_t vqdmlsdhxq(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_n_s16))) int16x8_t vqdmulhq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_n_s16))) int16x8_t vqdmulhq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_n_s32))) int32x4_t vqdmulhq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_n_s32))) int32x4_t vqdmulhq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_n_s8))) int8x16_t vqdmulhq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_n_s8))) int8x16_t vqdmulhq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_s16))) int16x8_t vqdmulhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_s16))) int16x8_t vqdmulhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_s32))) int32x4_t vqdmulhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_s32))) int32x4_t vqdmulhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_s8))) int8x16_t vqdmulhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_m_s8))) int8x16_t vqdmulhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_n_s16))) int16x8_t vqdmulhq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_n_s16))) int16x8_t vqdmulhq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_n_s32))) int32x4_t vqdmulhq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_n_s32))) int32x4_t vqdmulhq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_n_s8))) int8x16_t vqdmulhq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_n_s8))) int8x16_t vqdmulhq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_s16))) int16x8_t vqdmulhq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_s16))) int16x8_t vqdmulhq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_s32))) int32x4_t vqdmulhq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_s32))) int32x4_t vqdmulhq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_s8))) int8x16_t vqdmulhq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulhq_s8))) int8x16_t vqdmulhq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_m_n_s16))) int32x4_t vqdmullbq_m_n_s16(int32x4_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_m_n_s16))) int32x4_t vqdmullbq_m(int32x4_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_m_n_s32))) int64x2_t vqdmullbq_m_n_s32(int64x2_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_m_n_s32))) int64x2_t vqdmullbq_m(int64x2_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_m_s16))) int32x4_t vqdmullbq_m_s16(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_m_s16))) int32x4_t vqdmullbq_m(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_m_s32))) int64x2_t vqdmullbq_m_s32(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_m_s32))) int64x2_t vqdmullbq_m(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_n_s16))) int32x4_t vqdmullbq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_n_s16))) int32x4_t vqdmullbq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_n_s32))) int64x2_t vqdmullbq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_n_s32))) int64x2_t vqdmullbq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_s16))) int32x4_t vqdmullbq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_s16))) int32x4_t vqdmullbq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_s32))) int64x2_t vqdmullbq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmullbq_s32))) int64x2_t vqdmullbq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_m_n_s16))) int32x4_t vqdmulltq_m_n_s16(int32x4_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_m_n_s16))) int32x4_t vqdmulltq_m(int32x4_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_m_n_s32))) int64x2_t vqdmulltq_m_n_s32(int64x2_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_m_n_s32))) int64x2_t vqdmulltq_m(int64x2_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_m_s16))) int32x4_t vqdmulltq_m_s16(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_m_s16))) int32x4_t vqdmulltq_m(int32x4_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_m_s32))) int64x2_t vqdmulltq_m_s32(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_m_s32))) int64x2_t vqdmulltq_m(int64x2_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_n_s16))) int32x4_t vqdmulltq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_n_s16))) int32x4_t vqdmulltq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_n_s32))) int64x2_t vqdmulltq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_n_s32))) int64x2_t vqdmulltq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_s16))) int32x4_t vqdmulltq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_s16))) int32x4_t vqdmulltq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_s32))) int64x2_t vqdmulltq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqdmulltq_s32))) int64x2_t vqdmulltq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_m_s16))) int8x16_t vqmovnbq_m_s16(int8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_m_s16))) int8x16_t vqmovnbq_m(int8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_m_s32))) int16x8_t vqmovnbq_m_s32(int16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_m_s32))) int16x8_t vqmovnbq_m(int16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_m_u16))) uint8x16_t vqmovnbq_m_u16(uint8x16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_m_u16))) uint8x16_t vqmovnbq_m(uint8x16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_m_u32))) uint16x8_t vqmovnbq_m_u32(uint16x8_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_m_u32))) uint16x8_t vqmovnbq_m(uint16x8_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_s16))) int8x16_t vqmovnbq_s16(int8x16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_s16))) int8x16_t vqmovnbq(int8x16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_s32))) int16x8_t vqmovnbq_s32(int16x8_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_s32))) int16x8_t vqmovnbq(int16x8_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_u16))) uint8x16_t vqmovnbq_u16(uint8x16_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_u16))) uint8x16_t vqmovnbq(uint8x16_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_u32))) uint16x8_t vqmovnbq_u32(uint16x8_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovnbq_u32))) uint16x8_t vqmovnbq(uint16x8_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_m_s16))) int8x16_t vqmovntq_m_s16(int8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_m_s16))) int8x16_t vqmovntq_m(int8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_m_s32))) int16x8_t vqmovntq_m_s32(int16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_m_s32))) int16x8_t vqmovntq_m(int16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_m_u16))) uint8x16_t vqmovntq_m_u16(uint8x16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_m_u16))) uint8x16_t vqmovntq_m(uint8x16_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_m_u32))) uint16x8_t vqmovntq_m_u32(uint16x8_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_m_u32))) uint16x8_t vqmovntq_m(uint16x8_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_s16))) int8x16_t vqmovntq_s16(int8x16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_s16))) int8x16_t vqmovntq(int8x16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_s32))) int16x8_t vqmovntq_s32(int16x8_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_s32))) int16x8_t vqmovntq(int16x8_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_u16))) uint8x16_t vqmovntq_u16(uint8x16_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_u16))) uint8x16_t vqmovntq(uint8x16_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_u32))) uint16x8_t vqmovntq_u32(uint16x8_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovntq_u32))) uint16x8_t vqmovntq(uint16x8_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovunbq_m_s16))) uint8x16_t vqmovunbq_m_s16(uint8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovunbq_m_s16))) uint8x16_t vqmovunbq_m(uint8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovunbq_m_s32))) uint16x8_t vqmovunbq_m_s32(uint16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovunbq_m_s32))) uint16x8_t vqmovunbq_m(uint16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovunbq_s16))) uint8x16_t vqmovunbq_s16(uint8x16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovunbq_s16))) uint8x16_t vqmovunbq(uint8x16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovunbq_s32))) uint16x8_t vqmovunbq_s32(uint16x8_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovunbq_s32))) uint16x8_t vqmovunbq(uint16x8_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovuntq_m_s16))) uint8x16_t vqmovuntq_m_s16(uint8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovuntq_m_s16))) uint8x16_t vqmovuntq_m(uint8x16_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovuntq_m_s32))) uint16x8_t vqmovuntq_m_s32(uint16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovuntq_m_s32))) uint16x8_t vqmovuntq_m(uint16x8_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovuntq_s16))) uint8x16_t vqmovuntq_s16(uint8x16_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovuntq_s16))) uint8x16_t vqmovuntq(uint8x16_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqmovuntq_s32))) uint16x8_t vqmovuntq_s32(uint16x8_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqmovuntq_s32))) uint16x8_t vqmovuntq(uint16x8_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_m_s16))) int16x8_t vqnegq_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_m_s16))) int16x8_t vqnegq_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_m_s32))) int32x4_t vqnegq_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_m_s32))) int32x4_t vqnegq_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_m_s8))) int8x16_t vqnegq_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_m_s8))) int8x16_t vqnegq_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_s16))) int16x8_t vqnegq_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_s16))) int16x8_t vqnegq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_s32))) int32x4_t vqnegq_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_s32))) int32x4_t vqnegq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_s8))) int8x16_t vqnegq_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqnegq_s8))) int8x16_t vqnegq(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_m_s16))) int16x8_t vqrdmladhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_m_s16))) int16x8_t vqrdmladhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_m_s32))) int32x4_t vqrdmladhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_m_s32))) int32x4_t vqrdmladhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_m_s8))) int8x16_t vqrdmladhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_m_s8))) int8x16_t vqrdmladhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_s16))) int16x8_t vqrdmladhq_s16(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_s16))) int16x8_t vqrdmladhq(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_s32))) int32x4_t vqrdmladhq_s32(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_s32))) int32x4_t vqrdmladhq(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_s8))) int8x16_t vqrdmladhq_s8(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhq_s8))) int8x16_t vqrdmladhq(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_m_s16))) int16x8_t vqrdmladhxq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_m_s16))) int16x8_t vqrdmladhxq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_m_s32))) int32x4_t vqrdmladhxq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_m_s32))) int32x4_t vqrdmladhxq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_m_s8))) int8x16_t vqrdmladhxq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_m_s8))) int8x16_t vqrdmladhxq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_s16))) int16x8_t vqrdmladhxq_s16(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_s16))) int16x8_t vqrdmladhxq(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_s32))) int32x4_t vqrdmladhxq_s32(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_s32))) int32x4_t vqrdmladhxq(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_s8))) int8x16_t vqrdmladhxq_s8(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmladhxq_s8))) int8x16_t vqrdmladhxq(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_m_n_s16))) int16x8_t vqrdmlahq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_m_n_s16))) int16x8_t vqrdmlahq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_m_n_s32))) int32x4_t vqrdmlahq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_m_n_s32))) int32x4_t vqrdmlahq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_m_n_s8))) int8x16_t vqrdmlahq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_m_n_s8))) int8x16_t vqrdmlahq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_n_s16))) int16x8_t vqrdmlahq_n_s16(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_n_s16))) int16x8_t vqrdmlahq(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_n_s32))) int32x4_t vqrdmlahq_n_s32(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_n_s32))) int32x4_t vqrdmlahq(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_n_s8))) int8x16_t vqrdmlahq_n_s8(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlahq_n_s8))) int8x16_t vqrdmlahq(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_m_n_s16))) int16x8_t vqrdmlashq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_m_n_s16))) int16x8_t vqrdmlashq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_m_n_s32))) int32x4_t vqrdmlashq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_m_n_s32))) int32x4_t vqrdmlashq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_m_n_s8))) int8x16_t vqrdmlashq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_m_n_s8))) int8x16_t vqrdmlashq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_n_s16))) int16x8_t vqrdmlashq_n_s16(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_n_s16))) int16x8_t vqrdmlashq(int16x8_t, int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_n_s32))) int32x4_t vqrdmlashq_n_s32(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_n_s32))) int32x4_t vqrdmlashq(int32x4_t, int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_n_s8))) int8x16_t vqrdmlashq_n_s8(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlashq_n_s8))) int8x16_t vqrdmlashq(int8x16_t, int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_m_s16))) int16x8_t vqrdmlsdhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_m_s16))) int16x8_t vqrdmlsdhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_m_s32))) int32x4_t vqrdmlsdhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_m_s32))) int32x4_t vqrdmlsdhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_m_s8))) int8x16_t vqrdmlsdhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_m_s8))) int8x16_t vqrdmlsdhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_s16))) int16x8_t vqrdmlsdhq_s16(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_s16))) int16x8_t vqrdmlsdhq(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_s32))) int32x4_t vqrdmlsdhq_s32(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_s32))) int32x4_t vqrdmlsdhq(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_s8))) int8x16_t vqrdmlsdhq_s8(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhq_s8))) int8x16_t vqrdmlsdhq(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_m_s16))) int16x8_t vqrdmlsdhxq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_m_s16))) int16x8_t vqrdmlsdhxq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_m_s32))) int32x4_t vqrdmlsdhxq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_m_s32))) int32x4_t vqrdmlsdhxq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_m_s8))) int8x16_t vqrdmlsdhxq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_m_s8))) int8x16_t vqrdmlsdhxq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_s16))) int16x8_t vqrdmlsdhxq_s16(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_s16))) int16x8_t vqrdmlsdhxq(int16x8_t, int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_s32))) int32x4_t vqrdmlsdhxq_s32(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_s32))) int32x4_t vqrdmlsdhxq(int32x4_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_s8))) int8x16_t vqrdmlsdhxq_s8(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmlsdhxq_s8))) int8x16_t vqrdmlsdhxq(int8x16_t, int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_n_s16))) int16x8_t vqrdmulhq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_n_s16))) int16x8_t vqrdmulhq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_n_s32))) int32x4_t vqrdmulhq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_n_s32))) int32x4_t vqrdmulhq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_n_s8))) int8x16_t vqrdmulhq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_n_s8))) int8x16_t vqrdmulhq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_s16))) int16x8_t vqrdmulhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_s16))) int16x8_t vqrdmulhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_s32))) int32x4_t vqrdmulhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_s32))) int32x4_t vqrdmulhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_s8))) int8x16_t vqrdmulhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_m_s8))) int8x16_t vqrdmulhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_n_s16))) int16x8_t vqrdmulhq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_n_s16))) int16x8_t vqrdmulhq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_n_s32))) int32x4_t vqrdmulhq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_n_s32))) int32x4_t vqrdmulhq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_n_s8))) int8x16_t vqrdmulhq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_n_s8))) int8x16_t vqrdmulhq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_s16))) int16x8_t vqrdmulhq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_s16))) int16x8_t vqrdmulhq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_s32))) int32x4_t vqrdmulhq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_s32))) int32x4_t vqrdmulhq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_s8))) int8x16_t vqrdmulhq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrdmulhq_s8))) int8x16_t vqrdmulhq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_s16))) int16x8_t vqrshlq_m_n_s16(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_s16))) int16x8_t vqrshlq_m_n(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_s32))) int32x4_t vqrshlq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_s32))) int32x4_t vqrshlq_m_n(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_s8))) int8x16_t vqrshlq_m_n_s8(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_s8))) int8x16_t vqrshlq_m_n(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_u16))) uint16x8_t vqrshlq_m_n_u16(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_u16))) uint16x8_t vqrshlq_m_n(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_u32))) uint32x4_t vqrshlq_m_n_u32(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_u32))) uint32x4_t vqrshlq_m_n(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_u8))) uint8x16_t vqrshlq_m_n_u8(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_n_u8))) uint8x16_t vqrshlq_m_n(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_s16))) int16x8_t vqrshlq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_s16))) int16x8_t vqrshlq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_s32))) int32x4_t vqrshlq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_s32))) int32x4_t vqrshlq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_s8))) int8x16_t vqrshlq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_s8))) int8x16_t vqrshlq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_u16))) uint16x8_t vqrshlq_m_u16(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_u16))) uint16x8_t vqrshlq_m(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_u32))) uint32x4_t vqrshlq_m_u32(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_u32))) uint32x4_t vqrshlq_m(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_u8))) uint8x16_t vqrshlq_m_u8(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_m_u8))) uint8x16_t vqrshlq_m(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_s16))) int16x8_t vqrshlq_n_s16(int16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_s16))) int16x8_t vqrshlq(int16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_s32))) int32x4_t vqrshlq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_s32))) int32x4_t vqrshlq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_s8))) int8x16_t vqrshlq_n_s8(int8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_s8))) int8x16_t vqrshlq(int8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_u16))) uint16x8_t vqrshlq_n_u16(uint16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_u16))) uint16x8_t vqrshlq(uint16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_u32))) uint32x4_t vqrshlq_n_u32(uint32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_u32))) uint32x4_t vqrshlq(uint32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_u8))) uint8x16_t vqrshlq_n_u8(uint8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_n_u8))) uint8x16_t vqrshlq(uint8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_s16))) int16x8_t vqrshlq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_s16))) int16x8_t vqrshlq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_s32))) int32x4_t vqrshlq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_s32))) int32x4_t vqrshlq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_s8))) int8x16_t vqrshlq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_s8))) int8x16_t vqrshlq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_u16))) uint16x8_t vqrshlq_u16(uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_u16))) uint16x8_t vqrshlq(uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_u32))) uint32x4_t vqrshlq_u32(uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_u32))) uint32x4_t vqrshlq(uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_u8))) uint8x16_t vqrshlq_u8(uint8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshlq_u8))) uint8x16_t vqrshlq(uint8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_m_n_s16))) int8x16_t vqrshrnbq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_m_n_s16))) int8x16_t vqrshrnbq_m(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_m_n_s32))) int16x8_t vqrshrnbq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_m_n_s32))) int16x8_t vqrshrnbq_m(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_m_n_u16))) uint8x16_t vqrshrnbq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_m_n_u16))) uint8x16_t vqrshrnbq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_m_n_u32))) uint16x8_t vqrshrnbq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_m_n_u32))) uint16x8_t vqrshrnbq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_n_s16))) int8x16_t vqrshrnbq_n_s16(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_n_s16))) int8x16_t vqrshrnbq(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_n_s32))) int16x8_t vqrshrnbq_n_s32(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_n_s32))) int16x8_t vqrshrnbq(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_n_u16))) uint8x16_t vqrshrnbq_n_u16(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_n_u16))) uint8x16_t vqrshrnbq(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_n_u32))) uint16x8_t vqrshrnbq_n_u32(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrnbq_n_u32))) uint16x8_t vqrshrnbq(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_m_n_s16))) int8x16_t vqrshrntq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_m_n_s16))) int8x16_t vqrshrntq_m(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_m_n_s32))) int16x8_t vqrshrntq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_m_n_s32))) int16x8_t vqrshrntq_m(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_m_n_u16))) uint8x16_t vqrshrntq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_m_n_u16))) uint8x16_t vqrshrntq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_m_n_u32))) uint16x8_t vqrshrntq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_m_n_u32))) uint16x8_t vqrshrntq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_n_s16))) int8x16_t vqrshrntq_n_s16(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_n_s16))) int8x16_t vqrshrntq(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_n_s32))) int16x8_t vqrshrntq_n_s32(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_n_s32))) int16x8_t vqrshrntq(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_n_u16))) uint8x16_t vqrshrntq_n_u16(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_n_u16))) uint8x16_t vqrshrntq(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_n_u32))) uint16x8_t vqrshrntq_n_u32(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrntq_n_u32))) uint16x8_t vqrshrntq(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrunbq_m_n_s16))) uint8x16_t vqrshrunbq_m_n_s16(uint8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrunbq_m_n_s16))) uint8x16_t vqrshrunbq_m(uint8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrunbq_m_n_s32))) uint16x8_t vqrshrunbq_m_n_s32(uint16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrunbq_m_n_s32))) uint16x8_t vqrshrunbq_m(uint16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrunbq_n_s16))) uint8x16_t vqrshrunbq_n_s16(uint8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrunbq_n_s16))) uint8x16_t vqrshrunbq(uint8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshrunbq_n_s32))) uint16x8_t vqrshrunbq_n_s32(uint16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshrunbq_n_s32))) uint16x8_t vqrshrunbq(uint16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshruntq_m_n_s16))) uint8x16_t vqrshruntq_m_n_s16(uint8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshruntq_m_n_s16))) uint8x16_t vqrshruntq_m(uint8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshruntq_m_n_s32))) uint16x8_t vqrshruntq_m_n_s32(uint16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshruntq_m_n_s32))) uint16x8_t vqrshruntq_m(uint16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshruntq_n_s16))) uint8x16_t vqrshruntq_n_s16(uint8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshruntq_n_s16))) uint8x16_t vqrshruntq(uint8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqrshruntq_n_s32))) uint16x8_t vqrshruntq_n_s32(uint16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqrshruntq_n_s32))) uint16x8_t vqrshruntq(uint16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_s16))) int16x8_t vqshlq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_s16))) int16x8_t vqshlq_m_n(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_s32))) int32x4_t vqshlq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_s32))) int32x4_t vqshlq_m_n(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_s8))) int8x16_t vqshlq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_s8))) int8x16_t vqshlq_m_n(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_u16))) uint16x8_t vqshlq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_u16))) uint16x8_t vqshlq_m_n(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_u32))) uint32x4_t vqshlq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_u32))) uint32x4_t vqshlq_m_n(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_u8))) uint8x16_t vqshlq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_n_u8))) uint8x16_t vqshlq_m_n(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_s16))) int16x8_t vqshlq_m_r_s16(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_s16))) int16x8_t vqshlq_m_r(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_s32))) int32x4_t vqshlq_m_r_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_s32))) int32x4_t vqshlq_m_r(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_s8))) int8x16_t vqshlq_m_r_s8(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_s8))) int8x16_t vqshlq_m_r(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_u16))) uint16x8_t vqshlq_m_r_u16(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_u16))) uint16x8_t vqshlq_m_r(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_u32))) uint32x4_t vqshlq_m_r_u32(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_u32))) uint32x4_t vqshlq_m_r(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_u8))) uint8x16_t vqshlq_m_r_u8(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_r_u8))) uint8x16_t vqshlq_m_r(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_s16))) int16x8_t vqshlq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_s16))) int16x8_t vqshlq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_s32))) int32x4_t vqshlq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_s32))) int32x4_t vqshlq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_s8))) int8x16_t vqshlq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_s8))) int8x16_t vqshlq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_u16))) uint16x8_t vqshlq_m_u16(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_u16))) uint16x8_t vqshlq_m(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_u32))) uint32x4_t vqshlq_m_u32(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_u32))) uint32x4_t vqshlq_m(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_u8))) uint8x16_t vqshlq_m_u8(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_m_u8))) uint8x16_t vqshlq_m(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_s16))) int16x8_t vqshlq_n_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_s16))) int16x8_t vqshlq_n(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_s32))) int32x4_t vqshlq_n_s32(int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_s32))) int32x4_t vqshlq_n(int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_s8))) int8x16_t vqshlq_n_s8(int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_s8))) int8x16_t vqshlq_n(int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_u16))) uint16x8_t vqshlq_n_u16(uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_u16))) uint16x8_t vqshlq_n(uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_u32))) uint32x4_t vqshlq_n_u32(uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_u32))) uint32x4_t vqshlq_n(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_u8))) uint8x16_t vqshlq_n_u8(uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_n_u8))) uint8x16_t vqshlq_n(uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_s16))) int16x8_t vqshlq_r_s16(int16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_s16))) int16x8_t vqshlq_r(int16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_s32))) int32x4_t vqshlq_r_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_s32))) int32x4_t vqshlq_r(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_s8))) int8x16_t vqshlq_r_s8(int8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_s8))) int8x16_t vqshlq_r(int8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_u16))) uint16x8_t vqshlq_r_u16(uint16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_u16))) uint16x8_t vqshlq_r(uint16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_u32))) uint32x4_t vqshlq_r_u32(uint32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_u32))) uint32x4_t vqshlq_r(uint32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_u8))) uint8x16_t vqshlq_r_u8(uint8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_r_u8))) uint8x16_t vqshlq_r(uint8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_s16))) int16x8_t vqshlq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_s16))) int16x8_t vqshlq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_s32))) int32x4_t vqshlq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_s32))) int32x4_t vqshlq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_s8))) int8x16_t vqshlq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_s8))) int8x16_t vqshlq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_u16))) uint16x8_t vqshlq_u16(uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_u16))) uint16x8_t vqshlq(uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_u32))) uint32x4_t vqshlq_u32(uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_u32))) uint32x4_t vqshlq(uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_u8))) uint8x16_t vqshlq_u8(uint8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshlq_u8))) uint8x16_t vqshlq(uint8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_m_n_s16))) uint16x8_t vqshluq_m_n_s16(uint16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_m_n_s16))) uint16x8_t vqshluq_m(uint16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_m_n_s32))) uint32x4_t vqshluq_m_n_s32(uint32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_m_n_s32))) uint32x4_t vqshluq_m(uint32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_m_n_s8))) uint8x16_t vqshluq_m_n_s8(uint8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_m_n_s8))) uint8x16_t vqshluq_m(uint8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_n_s16))) uint16x8_t vqshluq_n_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_n_s16))) uint16x8_t vqshluq(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_n_s32))) uint32x4_t vqshluq_n_s32(int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_n_s32))) uint32x4_t vqshluq(int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_n_s8))) uint8x16_t vqshluq_n_s8(int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshluq_n_s8))) uint8x16_t vqshluq(int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_m_n_s16))) int8x16_t vqshrnbq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_m_n_s16))) int8x16_t vqshrnbq_m(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_m_n_s32))) int16x8_t vqshrnbq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_m_n_s32))) int16x8_t vqshrnbq_m(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_m_n_u16))) uint8x16_t vqshrnbq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_m_n_u16))) uint8x16_t vqshrnbq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_m_n_u32))) uint16x8_t vqshrnbq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_m_n_u32))) uint16x8_t vqshrnbq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_n_s16))) int8x16_t vqshrnbq_n_s16(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_n_s16))) int8x16_t vqshrnbq(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_n_s32))) int16x8_t vqshrnbq_n_s32(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_n_s32))) int16x8_t vqshrnbq(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_n_u16))) uint8x16_t vqshrnbq_n_u16(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_n_u16))) uint8x16_t vqshrnbq(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_n_u32))) uint16x8_t vqshrnbq_n_u32(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrnbq_n_u32))) uint16x8_t vqshrnbq(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_m_n_s16))) int8x16_t vqshrntq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_m_n_s16))) int8x16_t vqshrntq_m(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_m_n_s32))) int16x8_t vqshrntq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_m_n_s32))) int16x8_t vqshrntq_m(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_m_n_u16))) uint8x16_t vqshrntq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_m_n_u16))) uint8x16_t vqshrntq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_m_n_u32))) uint16x8_t vqshrntq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_m_n_u32))) uint16x8_t vqshrntq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_n_s16))) int8x16_t vqshrntq_n_s16(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_n_s16))) int8x16_t vqshrntq(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_n_s32))) int16x8_t vqshrntq_n_s32(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_n_s32))) int16x8_t vqshrntq(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_n_u16))) uint8x16_t vqshrntq_n_u16(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_n_u16))) uint8x16_t vqshrntq(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_n_u32))) uint16x8_t vqshrntq_n_u32(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrntq_n_u32))) uint16x8_t vqshrntq(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrunbq_m_n_s16))) uint8x16_t vqshrunbq_m_n_s16(uint8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrunbq_m_n_s16))) uint8x16_t vqshrunbq_m(uint8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrunbq_m_n_s32))) uint16x8_t vqshrunbq_m_n_s32(uint16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrunbq_m_n_s32))) uint16x8_t vqshrunbq_m(uint16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrunbq_n_s16))) uint8x16_t vqshrunbq_n_s16(uint8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrunbq_n_s16))) uint8x16_t vqshrunbq(uint8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshrunbq_n_s32))) uint16x8_t vqshrunbq_n_s32(uint16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshrunbq_n_s32))) uint16x8_t vqshrunbq(uint16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshruntq_m_n_s16))) uint8x16_t vqshruntq_m_n_s16(uint8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshruntq_m_n_s16))) uint8x16_t vqshruntq_m(uint8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshruntq_m_n_s32))) uint16x8_t vqshruntq_m_n_s32(uint16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshruntq_m_n_s32))) uint16x8_t vqshruntq_m(uint16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshruntq_n_s16))) uint8x16_t vqshruntq_n_s16(uint8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshruntq_n_s16))) uint8x16_t vqshruntq(uint8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqshruntq_n_s32))) uint16x8_t vqshruntq_n_s32(uint16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqshruntq_n_s32))) uint16x8_t vqshruntq(uint16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_s16))) int16x8_t vqsubq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_s16))) int16x8_t vqsubq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_s32))) int32x4_t vqsubq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_s32))) int32x4_t vqsubq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_s8))) int8x16_t vqsubq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_s8))) int8x16_t vqsubq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_u16))) uint16x8_t vqsubq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_u16))) uint16x8_t vqsubq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_u32))) uint32x4_t vqsubq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_u32))) uint32x4_t vqsubq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_u8))) uint8x16_t vqsubq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_n_u8))) uint8x16_t vqsubq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_s16))) int16x8_t vqsubq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_s16))) int16x8_t vqsubq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_s32))) int32x4_t vqsubq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_s32))) int32x4_t vqsubq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_s8))) int8x16_t vqsubq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_s8))) int8x16_t vqsubq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_u16))) uint16x8_t vqsubq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_u16))) uint16x8_t vqsubq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_u32))) uint32x4_t vqsubq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_u32))) uint32x4_t vqsubq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_u8))) uint8x16_t vqsubq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_m_u8))) uint8x16_t vqsubq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_s16))) int16x8_t vqsubq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_s16))) int16x8_t vqsubq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_s32))) int32x4_t vqsubq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_s32))) int32x4_t vqsubq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_s8))) int8x16_t vqsubq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_s8))) int8x16_t vqsubq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_u16))) uint16x8_t vqsubq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_u16))) uint16x8_t vqsubq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_u32))) uint32x4_t vqsubq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_u32))) uint32x4_t vqsubq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_u8))) uint8x16_t vqsubq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_n_u8))) uint8x16_t vqsubq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_s16))) int16x8_t vqsubq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_s16))) int16x8_t vqsubq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_s32))) int32x4_t vqsubq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_s32))) int32x4_t vqsubq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_s8))) int8x16_t vqsubq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_s8))) int8x16_t vqsubq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_u16))) uint16x8_t vqsubq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_u16))) uint16x8_t vqsubq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_u32))) uint32x4_t vqsubq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_u32))) uint32x4_t vqsubq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_u8))) uint8x16_t vqsubq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vqsubq_u8))) uint8x16_t vqsubq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_s32))) int16x8_t vreinterpretq_s16_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_s32))) int16x8_t vreinterpretq_s16(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_s64))) int16x8_t vreinterpretq_s16_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_s64))) int16x8_t vreinterpretq_s16(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_s8))) int16x8_t vreinterpretq_s16_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_s8))) int16x8_t vreinterpretq_s16(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_u16))) int16x8_t vreinterpretq_s16_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_u16))) int16x8_t vreinterpretq_s16(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_u32))) int16x8_t vreinterpretq_s16_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_u32))) int16x8_t vreinterpretq_s16(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_u64))) int16x8_t vreinterpretq_s16_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_u64))) int16x8_t vreinterpretq_s16(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_u8))) int16x8_t vreinterpretq_s16_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_u8))) int16x8_t vreinterpretq_s16(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_s16))) int32x4_t vreinterpretq_s32_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_s16))) int32x4_t vreinterpretq_s32(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_s64))) int32x4_t vreinterpretq_s32_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_s64))) int32x4_t vreinterpretq_s32(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_s8))) int32x4_t vreinterpretq_s32_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_s8))) int32x4_t vreinterpretq_s32(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_u16))) int32x4_t vreinterpretq_s32_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_u16))) int32x4_t vreinterpretq_s32(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_u32))) int32x4_t vreinterpretq_s32_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_u32))) int32x4_t vreinterpretq_s32(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_u64))) int32x4_t vreinterpretq_s32_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_u64))) int32x4_t vreinterpretq_s32(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_u8))) int32x4_t vreinterpretq_s32_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_u8))) int32x4_t vreinterpretq_s32(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_s16))) int64x2_t vreinterpretq_s64_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_s16))) int64x2_t vreinterpretq_s64(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_s32))) int64x2_t vreinterpretq_s64_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_s32))) int64x2_t vreinterpretq_s64(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_s8))) int64x2_t vreinterpretq_s64_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_s8))) int64x2_t vreinterpretq_s64(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_u16))) int64x2_t vreinterpretq_s64_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_u16))) int64x2_t vreinterpretq_s64(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_u32))) int64x2_t vreinterpretq_s64_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_u32))) int64x2_t vreinterpretq_s64(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_u64))) int64x2_t vreinterpretq_s64_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_u64))) int64x2_t vreinterpretq_s64(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_u8))) int64x2_t vreinterpretq_s64_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_u8))) int64x2_t vreinterpretq_s64(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_s16))) int8x16_t vreinterpretq_s8_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_s16))) int8x16_t vreinterpretq_s8(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_s32))) int8x16_t vreinterpretq_s8_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_s32))) int8x16_t vreinterpretq_s8(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_s64))) int8x16_t vreinterpretq_s8_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_s64))) int8x16_t vreinterpretq_s8(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_u16))) int8x16_t vreinterpretq_s8_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_u16))) int8x16_t vreinterpretq_s8(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_u32))) int8x16_t vreinterpretq_s8_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_u32))) int8x16_t vreinterpretq_s8(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_u64))) int8x16_t vreinterpretq_s8_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_u64))) int8x16_t vreinterpretq_s8(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_u8))) int8x16_t vreinterpretq_s8_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_u8))) int8x16_t vreinterpretq_s8(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_s16))) uint16x8_t vreinterpretq_u16_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_s16))) uint16x8_t vreinterpretq_u16(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_s32))) uint16x8_t vreinterpretq_u16_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_s32))) uint16x8_t vreinterpretq_u16(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_s64))) uint16x8_t vreinterpretq_u16_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_s64))) uint16x8_t vreinterpretq_u16(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_s8))) uint16x8_t vreinterpretq_u16_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_s8))) uint16x8_t vreinterpretq_u16(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_u32))) uint16x8_t vreinterpretq_u16_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_u32))) uint16x8_t vreinterpretq_u16(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_u64))) uint16x8_t vreinterpretq_u16_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_u64))) uint16x8_t vreinterpretq_u16(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_u8))) uint16x8_t vreinterpretq_u16_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_u8))) uint16x8_t vreinterpretq_u16(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_s16))) uint32x4_t vreinterpretq_u32_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_s16))) uint32x4_t vreinterpretq_u32(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_s32))) uint32x4_t vreinterpretq_u32_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_s32))) uint32x4_t vreinterpretq_u32(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_s64))) uint32x4_t vreinterpretq_u32_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_s64))) uint32x4_t vreinterpretq_u32(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_s8))) uint32x4_t vreinterpretq_u32_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_s8))) uint32x4_t vreinterpretq_u32(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_u16))) uint32x4_t vreinterpretq_u32_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_u16))) uint32x4_t vreinterpretq_u32(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_u64))) uint32x4_t vreinterpretq_u32_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_u64))) uint32x4_t vreinterpretq_u32(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_u8))) uint32x4_t vreinterpretq_u32_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_u8))) uint32x4_t vreinterpretq_u32(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_s16))) uint64x2_t vreinterpretq_u64_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_s16))) uint64x2_t vreinterpretq_u64(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_s32))) uint64x2_t vreinterpretq_u64_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_s32))) uint64x2_t vreinterpretq_u64(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_s64))) uint64x2_t vreinterpretq_u64_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_s64))) uint64x2_t vreinterpretq_u64(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_s8))) uint64x2_t vreinterpretq_u64_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_s8))) uint64x2_t vreinterpretq_u64(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_u16))) uint64x2_t vreinterpretq_u64_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_u16))) uint64x2_t vreinterpretq_u64(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_u32))) uint64x2_t vreinterpretq_u64_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_u32))) uint64x2_t vreinterpretq_u64(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_u8))) uint64x2_t vreinterpretq_u64_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_u8))) uint64x2_t vreinterpretq_u64(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s16))) uint8x16_t vreinterpretq_u8_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s16))) uint8x16_t vreinterpretq_u8(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s32))) uint8x16_t vreinterpretq_u8_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s32))) uint8x16_t vreinterpretq_u8(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s64))) uint8x16_t vreinterpretq_u8_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s64))) uint8x16_t vreinterpretq_u8(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s8))) uint8x16_t vreinterpretq_u8_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_s8))) uint8x16_t vreinterpretq_u8(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_u16))) uint8x16_t vreinterpretq_u8_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_u16))) uint8x16_t vreinterpretq_u8(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_u32))) uint8x16_t vreinterpretq_u8_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_u32))) uint8x16_t vreinterpretq_u8(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_u64))) uint8x16_t vreinterpretq_u8_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_u64))) uint8x16_t vreinterpretq_u8(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_m_s8))) int8x16_t vrev16q_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_m_s8))) int8x16_t vrev16q_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_m_u8))) uint8x16_t vrev16q_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_m_u8))) uint8x16_t vrev16q_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_s8))) int8x16_t vrev16q_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_s8))) int8x16_t vrev16q(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_u8))) uint8x16_t vrev16q_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_u8))) uint8x16_t vrev16q(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_x_s8))) int8x16_t vrev16q_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_x_s8))) int8x16_t vrev16q_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_x_u8))) uint8x16_t vrev16q_x_u8(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev16q_x_u8))) uint8x16_t vrev16q_x(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_s16))) int16x8_t vrev32q_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_s16))) int16x8_t vrev32q_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_s8))) int8x16_t vrev32q_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_s8))) int8x16_t vrev32q_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_u16))) uint16x8_t vrev32q_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_u16))) uint16x8_t vrev32q_m(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_u8))) uint8x16_t vrev32q_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_u8))) uint8x16_t vrev32q_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_s16))) int16x8_t vrev32q_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_s16))) int16x8_t vrev32q(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_s8))) int8x16_t vrev32q_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_s8))) int8x16_t vrev32q(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_u16))) uint16x8_t vrev32q_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_u16))) uint16x8_t vrev32q(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_u8))) uint8x16_t vrev32q_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_u8))) uint8x16_t vrev32q(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_s16))) int16x8_t vrev32q_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_s16))) int16x8_t vrev32q_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_s8))) int8x16_t vrev32q_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_s8))) int8x16_t vrev32q_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_u16))) uint16x8_t vrev32q_x_u16(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_u16))) uint16x8_t vrev32q_x(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_u8))) uint8x16_t vrev32q_x_u8(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_u8))) uint8x16_t vrev32q_x(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_s16))) int16x8_t vrev64q_m_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_s16))) int16x8_t vrev64q_m(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_s32))) int32x4_t vrev64q_m_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_s32))) int32x4_t vrev64q_m(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_s8))) int8x16_t vrev64q_m_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_s8))) int8x16_t vrev64q_m(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_u16))) uint16x8_t vrev64q_m_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_u16))) uint16x8_t vrev64q_m(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_u32))) uint32x4_t vrev64q_m_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_u32))) uint32x4_t vrev64q_m(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_u8))) uint8x16_t vrev64q_m_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_u8))) uint8x16_t vrev64q_m(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_s16))) int16x8_t vrev64q_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_s16))) int16x8_t vrev64q(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_s32))) int32x4_t vrev64q_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_s32))) int32x4_t vrev64q(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_s8))) int8x16_t vrev64q_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_s8))) int8x16_t vrev64q(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_u16))) uint16x8_t vrev64q_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_u16))) uint16x8_t vrev64q(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_u32))) uint32x4_t vrev64q_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_u32))) uint32x4_t vrev64q(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_u8))) uint8x16_t vrev64q_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_u8))) uint8x16_t vrev64q(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_s16))) int16x8_t vrev64q_x_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_s16))) int16x8_t vrev64q_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_s32))) int32x4_t vrev64q_x_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_s32))) int32x4_t vrev64q_x(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_s8))) int8x16_t vrev64q_x_s8(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_s8))) int8x16_t vrev64q_x(int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_u16))) uint16x8_t vrev64q_x_u16(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_u16))) uint16x8_t vrev64q_x(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_u32))) uint32x4_t vrev64q_x_u32(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_u32))) uint32x4_t vrev64q_x(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_u8))) uint8x16_t vrev64q_x_u8(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_u8))) uint8x16_t vrev64q_x(uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_s16))) int16x8_t vrhaddq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_s16))) int16x8_t vrhaddq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_s32))) int32x4_t vrhaddq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_s32))) int32x4_t vrhaddq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_s8))) int8x16_t vrhaddq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_s8))) int8x16_t vrhaddq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_u16))) uint16x8_t vrhaddq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_u16))) uint16x8_t vrhaddq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_u32))) uint32x4_t vrhaddq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_u32))) uint32x4_t vrhaddq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_u8))) uint8x16_t vrhaddq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_m_u8))) uint8x16_t vrhaddq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_s16))) int16x8_t vrhaddq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_s16))) int16x8_t vrhaddq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_s32))) int32x4_t vrhaddq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_s32))) int32x4_t vrhaddq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_s8))) int8x16_t vrhaddq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_s8))) int8x16_t vrhaddq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_u16))) uint16x8_t vrhaddq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_u16))) uint16x8_t vrhaddq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_u32))) uint32x4_t vrhaddq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_u32))) uint32x4_t vrhaddq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_u8))) uint8x16_t vrhaddq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_u8))) uint8x16_t vrhaddq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_s16))) int16x8_t vrhaddq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_s16))) int16x8_t vrhaddq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_s32))) int32x4_t vrhaddq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_s32))) int32x4_t vrhaddq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_s8))) int8x16_t vrhaddq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_s8))) int8x16_t vrhaddq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_u16))) uint16x8_t vrhaddq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_u16))) uint16x8_t vrhaddq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_u32))) uint32x4_t vrhaddq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_u32))) uint32x4_t vrhaddq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_u8))) uint8x16_t vrhaddq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrhaddq_x_u8))) uint8x16_t vrhaddq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaq_p_s32))) int64_t vrmlaldavhaq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaq_p_s32))) int64_t vrmlaldavhaq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaq_p_u32))) uint64_t vrmlaldavhaq_p_u32(uint64_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaq_p_u32))) uint64_t vrmlaldavhaq_p(uint64_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaq_s32))) int64_t vrmlaldavhaq_s32(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaq_s32))) int64_t vrmlaldavhaq(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaq_u32))) uint64_t vrmlaldavhaq_u32(uint64_t, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaq_u32))) uint64_t vrmlaldavhaq(uint64_t, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaxq_p_s32))) int64_t vrmlaldavhaxq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaxq_p_s32))) int64_t vrmlaldavhaxq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaxq_s32))) int64_t vrmlaldavhaxq_s32(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhaxq_s32))) int64_t vrmlaldavhaxq(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhq_p_s32))) int64_t vrmlaldavhq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhq_p_s32))) int64_t vrmlaldavhq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhq_p_u32))) uint64_t vrmlaldavhq_p_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhq_p_u32))) uint64_t vrmlaldavhq_p(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhq_s32))) int64_t vrmlaldavhq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhq_s32))) int64_t vrmlaldavhq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhq_u32))) uint64_t vrmlaldavhq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhq_u32))) uint64_t vrmlaldavhq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhxq_p_s32))) int64_t vrmlaldavhxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhxq_p_s32))) int64_t vrmlaldavhxq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhxq_s32))) int64_t vrmlaldavhxq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlaldavhxq_s32))) int64_t vrmlaldavhxq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhaq_p_s32))) int64_t vrmlsldavhaq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhaq_p_s32))) int64_t vrmlsldavhaq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhaq_s32))) int64_t vrmlsldavhaq_s32(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhaq_s32))) int64_t vrmlsldavhaq(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhaxq_p_s32))) int64_t vrmlsldavhaxq_p_s32(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhaxq_p_s32))) int64_t vrmlsldavhaxq_p(int64_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhaxq_s32))) int64_t vrmlsldavhaxq_s32(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhaxq_s32))) int64_t vrmlsldavhaxq(int64_t, int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhq_p_s32))) int64_t vrmlsldavhq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhq_p_s32))) int64_t vrmlsldavhq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhq_s32))) int64_t vrmlsldavhq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhq_s32))) int64_t vrmlsldavhq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhxq_p_s32))) int64_t vrmlsldavhxq_p_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhxq_p_s32))) int64_t vrmlsldavhxq_p(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhxq_s32))) int64_t vrmlsldavhxq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmlsldavhxq_s32))) int64_t vrmlsldavhxq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_s16))) int16x8_t vrmulhq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_s16))) int16x8_t vrmulhq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_s32))) int32x4_t vrmulhq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_s32))) int32x4_t vrmulhq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_s8))) int8x16_t vrmulhq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_s8))) int8x16_t vrmulhq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_u16))) uint16x8_t vrmulhq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_u16))) uint16x8_t vrmulhq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_u32))) uint32x4_t vrmulhq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_u32))) uint32x4_t vrmulhq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_u8))) uint8x16_t vrmulhq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_m_u8))) uint8x16_t vrmulhq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_s16))) int16x8_t vrmulhq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_s16))) int16x8_t vrmulhq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_s32))) int32x4_t vrmulhq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_s32))) int32x4_t vrmulhq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_s8))) int8x16_t vrmulhq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_s8))) int8x16_t vrmulhq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_u16))) uint16x8_t vrmulhq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_u16))) uint16x8_t vrmulhq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_u32))) uint32x4_t vrmulhq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_u32))) uint32x4_t vrmulhq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_u8))) uint8x16_t vrmulhq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_u8))) uint8x16_t vrmulhq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_s16))) int16x8_t vrmulhq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_s16))) int16x8_t vrmulhq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_s32))) int32x4_t vrmulhq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_s32))) int32x4_t vrmulhq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_s8))) int8x16_t vrmulhq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_s8))) int8x16_t vrmulhq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_u16))) uint16x8_t vrmulhq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_u16))) uint16x8_t vrmulhq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_u32))) uint32x4_t vrmulhq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_u32))) uint32x4_t vrmulhq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_u8))) uint8x16_t vrmulhq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrmulhq_x_u8))) uint8x16_t vrmulhq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_s16))) int16x8_t vrshlq_m_n_s16(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_s16))) int16x8_t vrshlq_m_n(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_s32))) int32x4_t vrshlq_m_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_s32))) int32x4_t vrshlq_m_n(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_s8))) int8x16_t vrshlq_m_n_s8(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_s8))) int8x16_t vrshlq_m_n(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_u16))) uint16x8_t vrshlq_m_n_u16(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_u16))) uint16x8_t vrshlq_m_n(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_u32))) uint32x4_t vrshlq_m_n_u32(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_u32))) uint32x4_t vrshlq_m_n(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_u8))) uint8x16_t vrshlq_m_n_u8(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_n_u8))) uint8x16_t vrshlq_m_n(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_s16))) int16x8_t vrshlq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_s16))) int16x8_t vrshlq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_s32))) int32x4_t vrshlq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_s32))) int32x4_t vrshlq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_s8))) int8x16_t vrshlq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_s8))) int8x16_t vrshlq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_u16))) uint16x8_t vrshlq_m_u16(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_u16))) uint16x8_t vrshlq_m(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_u32))) uint32x4_t vrshlq_m_u32(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_u32))) uint32x4_t vrshlq_m(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_u8))) uint8x16_t vrshlq_m_u8(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_m_u8))) uint8x16_t vrshlq_m(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_s16))) int16x8_t vrshlq_n_s16(int16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_s16))) int16x8_t vrshlq(int16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_s32))) int32x4_t vrshlq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_s32))) int32x4_t vrshlq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_s8))) int8x16_t vrshlq_n_s8(int8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_s8))) int8x16_t vrshlq(int8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_u16))) uint16x8_t vrshlq_n_u16(uint16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_u16))) uint16x8_t vrshlq(uint16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_u32))) uint32x4_t vrshlq_n_u32(uint32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_u32))) uint32x4_t vrshlq(uint32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_u8))) uint8x16_t vrshlq_n_u8(uint8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_n_u8))) uint8x16_t vrshlq(uint8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_s16))) int16x8_t vrshlq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_s16))) int16x8_t vrshlq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_s32))) int32x4_t vrshlq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_s32))) int32x4_t vrshlq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_s8))) int8x16_t vrshlq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_s8))) int8x16_t vrshlq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_u16))) uint16x8_t vrshlq_u16(uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_u16))) uint16x8_t vrshlq(uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_u32))) uint32x4_t vrshlq_u32(uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_u32))) uint32x4_t vrshlq(uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_u8))) uint8x16_t vrshlq_u8(uint8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_u8))) uint8x16_t vrshlq(uint8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_s16))) int16x8_t vrshlq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_s16))) int16x8_t vrshlq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_s32))) int32x4_t vrshlq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_s32))) int32x4_t vrshlq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_s8))) int8x16_t vrshlq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_s8))) int8x16_t vrshlq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_u16))) uint16x8_t vrshlq_x_u16(uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_u16))) uint16x8_t vrshlq_x(uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_u32))) uint32x4_t vrshlq_x_u32(uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_u32))) uint32x4_t vrshlq_x(uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_u8))) uint8x16_t vrshlq_x_u8(uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshlq_x_u8))) uint8x16_t vrshlq_x(uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_m_n_s16))) int8x16_t vrshrnbq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_m_n_s16))) int8x16_t vrshrnbq_m(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_m_n_s32))) int16x8_t vrshrnbq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_m_n_s32))) int16x8_t vrshrnbq_m(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_m_n_u16))) uint8x16_t vrshrnbq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_m_n_u16))) uint8x16_t vrshrnbq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_m_n_u32))) uint16x8_t vrshrnbq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_m_n_u32))) uint16x8_t vrshrnbq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_n_s16))) int8x16_t vrshrnbq_n_s16(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_n_s16))) int8x16_t vrshrnbq(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_n_s32))) int16x8_t vrshrnbq_n_s32(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_n_s32))) int16x8_t vrshrnbq(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_n_u16))) uint8x16_t vrshrnbq_n_u16(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_n_u16))) uint8x16_t vrshrnbq(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_n_u32))) uint16x8_t vrshrnbq_n_u32(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrnbq_n_u32))) uint16x8_t vrshrnbq(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_m_n_s16))) int8x16_t vrshrntq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_m_n_s16))) int8x16_t vrshrntq_m(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_m_n_s32))) int16x8_t vrshrntq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_m_n_s32))) int16x8_t vrshrntq_m(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_m_n_u16))) uint8x16_t vrshrntq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_m_n_u16))) uint8x16_t vrshrntq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_m_n_u32))) uint16x8_t vrshrntq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_m_n_u32))) uint16x8_t vrshrntq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_n_s16))) int8x16_t vrshrntq_n_s16(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_n_s16))) int8x16_t vrshrntq(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_n_s32))) int16x8_t vrshrntq_n_s32(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_n_s32))) int16x8_t vrshrntq(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_n_u16))) uint8x16_t vrshrntq_n_u16(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_n_u16))) uint8x16_t vrshrntq(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_n_u32))) uint16x8_t vrshrntq_n_u32(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrntq_n_u32))) uint16x8_t vrshrntq(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_s16))) int16x8_t vrshrq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_s16))) int16x8_t vrshrq_m(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_s32))) int32x4_t vrshrq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_s32))) int32x4_t vrshrq_m(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_s8))) int8x16_t vrshrq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_s8))) int8x16_t vrshrq_m(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_u16))) uint16x8_t vrshrq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_u16))) uint16x8_t vrshrq_m(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_u32))) uint32x4_t vrshrq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_u32))) uint32x4_t vrshrq_m(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_u8))) uint8x16_t vrshrq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_m_n_u8))) uint8x16_t vrshrq_m(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_s16))) int16x8_t vrshrq_n_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_s16))) int16x8_t vrshrq(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_s32))) int32x4_t vrshrq_n_s32(int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_s32))) int32x4_t vrshrq(int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_s8))) int8x16_t vrshrq_n_s8(int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_s8))) int8x16_t vrshrq(int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_u16))) uint16x8_t vrshrq_n_u16(uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_u16))) uint16x8_t vrshrq(uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_u32))) uint32x4_t vrshrq_n_u32(uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_u32))) uint32x4_t vrshrq(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_u8))) uint8x16_t vrshrq_n_u8(uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_n_u8))) uint8x16_t vrshrq(uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_s16))) int16x8_t vrshrq_x_n_s16(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_s16))) int16x8_t vrshrq_x(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_s32))) int32x4_t vrshrq_x_n_s32(int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_s32))) int32x4_t vrshrq_x(int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_s8))) int8x16_t vrshrq_x_n_s8(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_s8))) int8x16_t vrshrq_x(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_u16))) uint16x8_t vrshrq_x_n_u16(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_u16))) uint16x8_t vrshrq_x(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_u32))) uint32x4_t vrshrq_x_n_u32(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_u32))) uint32x4_t vrshrq_x(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_u8))) uint8x16_t vrshrq_x_n_u8(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrshrq_x_n_u8))) uint8x16_t vrshrq_x(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsbciq_m_s32))) int32x4_t vsbciq_m_s32(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsbciq_m_s32))) int32x4_t vsbciq_m(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsbciq_m_u32))) uint32x4_t vsbciq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsbciq_m_u32))) uint32x4_t vsbciq_m(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsbciq_s32))) int32x4_t vsbciq_s32(int32x4_t, int32x4_t, unsigned *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsbciq_s32))) int32x4_t vsbciq(int32x4_t, int32x4_t, unsigned *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsbciq_u32))) uint32x4_t vsbciq_u32(uint32x4_t, uint32x4_t, unsigned *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsbciq_u32))) uint32x4_t vsbciq(uint32x4_t, uint32x4_t, unsigned *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsbcq_m_s32))) int32x4_t vsbcq_m_s32(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsbcq_m_s32))) int32x4_t vsbcq_m(int32x4_t, int32x4_t, int32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsbcq_m_u32))) uint32x4_t vsbcq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsbcq_m_u32))) uint32x4_t vsbcq_m(uint32x4_t, uint32x4_t, uint32x4_t, unsigned *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsbcq_s32))) int32x4_t vsbcq_s32(int32x4_t, int32x4_t, unsigned *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsbcq_s32))) int32x4_t vsbcq(int32x4_t, int32x4_t, unsigned *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsbcq_u32))) uint32x4_t vsbcq_u32(uint32x4_t, uint32x4_t, unsigned *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsbcq_u32))) uint32x4_t vsbcq(uint32x4_t, uint32x4_t, unsigned *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_s16))) int16x8_t vsetq_lane_s16(int16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_s16))) int16x8_t vsetq_lane(int16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_s32))) int32x4_t vsetq_lane_s32(int32_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_s32))) int32x4_t vsetq_lane(int32_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_s64))) int64x2_t vsetq_lane_s64(int64_t, int64x2_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_s64))) int64x2_t vsetq_lane(int64_t, int64x2_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_s8))) int8x16_t vsetq_lane_s8(int8_t, int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_s8))) int8x16_t vsetq_lane(int8_t, int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_u16))) uint16x8_t vsetq_lane_u16(uint16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_u16))) uint16x8_t vsetq_lane(uint16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_u32))) uint32x4_t vsetq_lane_u32(uint32_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_u32))) uint32x4_t vsetq_lane(uint32_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_u64))) uint64x2_t vsetq_lane_u64(uint64_t, uint64x2_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_u64))) uint64x2_t vsetq_lane(uint64_t, uint64x2_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_u8))) uint8x16_t vsetq_lane_u8(uint8_t, uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_u8))) uint8x16_t vsetq_lane(uint8_t, uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_s16))) int16x8_t vshlcq_m_s16(int16x8_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_s16))) int16x8_t vshlcq_m(int16x8_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_s32))) int32x4_t vshlcq_m_s32(int32x4_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_s32))) int32x4_t vshlcq_m(int32x4_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_s8))) int8x16_t vshlcq_m_s8(int8x16_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_s8))) int8x16_t vshlcq_m(int8x16_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_u16))) uint16x8_t vshlcq_m_u16(uint16x8_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_u16))) uint16x8_t vshlcq_m(uint16x8_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_u32))) uint32x4_t vshlcq_m_u32(uint32x4_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_u32))) uint32x4_t vshlcq_m(uint32x4_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_u8))) uint8x16_t vshlcq_m_u8(uint8x16_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_m_u8))) uint8x16_t vshlcq_m(uint8x16_t, uint32_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_s16))) int16x8_t vshlcq_s16(int16x8_t, uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_s16))) int16x8_t vshlcq(int16x8_t, uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_s32))) int32x4_t vshlcq_s32(int32x4_t, uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_s32))) int32x4_t vshlcq(int32x4_t, uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_s8))) int8x16_t vshlcq_s8(int8x16_t, uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_s8))) int8x16_t vshlcq(int8x16_t, uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_u16))) uint16x8_t vshlcq_u16(uint16x8_t, uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_u16))) uint16x8_t vshlcq(uint16x8_t, uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_u32))) uint32x4_t vshlcq_u32(uint32x4_t, uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_u32))) uint32x4_t vshlcq(uint32x4_t, uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_u8))) uint8x16_t vshlcq_u8(uint8x16_t, uint32_t *, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlcq_u8))) uint8x16_t vshlcq(uint8x16_t, uint32_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_m_n_s16))) int32x4_t vshllbq_m_n_s16(int32x4_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_m_n_s16))) int32x4_t vshllbq_m(int32x4_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_m_n_s8))) int16x8_t vshllbq_m_n_s8(int16x8_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_m_n_s8))) int16x8_t vshllbq_m(int16x8_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_m_n_u16))) uint32x4_t vshllbq_m_n_u16(uint32x4_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_m_n_u16))) uint32x4_t vshllbq_m(uint32x4_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_m_n_u8))) uint16x8_t vshllbq_m_n_u8(uint16x8_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_m_n_u8))) uint16x8_t vshllbq_m(uint16x8_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_n_s16))) int32x4_t vshllbq_n_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_n_s16))) int32x4_t vshllbq(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_n_s8))) int16x8_t vshllbq_n_s8(int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_n_s8))) int16x8_t vshllbq(int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_n_u16))) uint32x4_t vshllbq_n_u16(uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_n_u16))) uint32x4_t vshllbq(uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_n_u8))) uint16x8_t vshllbq_n_u8(uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_n_u8))) uint16x8_t vshllbq(uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_x_n_s16))) int32x4_t vshllbq_x_n_s16(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_x_n_s16))) int32x4_t vshllbq_x(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_x_n_s8))) int16x8_t vshllbq_x_n_s8(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_x_n_s8))) int16x8_t vshllbq_x(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_x_n_u16))) uint32x4_t vshllbq_x_n_u16(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_x_n_u16))) uint32x4_t vshllbq_x(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_x_n_u8))) uint16x8_t vshllbq_x_n_u8(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshllbq_x_n_u8))) uint16x8_t vshllbq_x(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_m_n_s16))) int32x4_t vshlltq_m_n_s16(int32x4_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_m_n_s16))) int32x4_t vshlltq_m(int32x4_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_m_n_s8))) int16x8_t vshlltq_m_n_s8(int16x8_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_m_n_s8))) int16x8_t vshlltq_m(int16x8_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_m_n_u16))) uint32x4_t vshlltq_m_n_u16(uint32x4_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_m_n_u16))) uint32x4_t vshlltq_m(uint32x4_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_m_n_u8))) uint16x8_t vshlltq_m_n_u8(uint16x8_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_m_n_u8))) uint16x8_t vshlltq_m(uint16x8_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_n_s16))) int32x4_t vshlltq_n_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_n_s16))) int32x4_t vshlltq(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_n_s8))) int16x8_t vshlltq_n_s8(int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_n_s8))) int16x8_t vshlltq(int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_n_u16))) uint32x4_t vshlltq_n_u16(uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_n_u16))) uint32x4_t vshlltq(uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_n_u8))) uint16x8_t vshlltq_n_u8(uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_n_u8))) uint16x8_t vshlltq(uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_x_n_s16))) int32x4_t vshlltq_x_n_s16(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_x_n_s16))) int32x4_t vshlltq_x(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_x_n_s8))) int16x8_t vshlltq_x_n_s8(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_x_n_s8))) int16x8_t vshlltq_x(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_x_n_u16))) uint32x4_t vshlltq_x_n_u16(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_x_n_u16))) uint32x4_t vshlltq_x(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_x_n_u8))) uint16x8_t vshlltq_x_n_u8(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlltq_x_n_u8))) uint16x8_t vshlltq_x(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_s16))) int16x8_t vshlq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_s16))) int16x8_t vshlq_m_n(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_s32))) int32x4_t vshlq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_s32))) int32x4_t vshlq_m_n(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_s8))) int8x16_t vshlq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_s8))) int8x16_t vshlq_m_n(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_u16))) uint16x8_t vshlq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_u16))) uint16x8_t vshlq_m_n(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_u32))) uint32x4_t vshlq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_u32))) uint32x4_t vshlq_m_n(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_u8))) uint8x16_t vshlq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_n_u8))) uint8x16_t vshlq_m_n(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_s16))) int16x8_t vshlq_m_r_s16(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_s16))) int16x8_t vshlq_m_r(int16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_s32))) int32x4_t vshlq_m_r_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_s32))) int32x4_t vshlq_m_r(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_s8))) int8x16_t vshlq_m_r_s8(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_s8))) int8x16_t vshlq_m_r(int8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_u16))) uint16x8_t vshlq_m_r_u16(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_u16))) uint16x8_t vshlq_m_r(uint16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_u32))) uint32x4_t vshlq_m_r_u32(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_u32))) uint32x4_t vshlq_m_r(uint32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_u8))) uint8x16_t vshlq_m_r_u8(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_r_u8))) uint8x16_t vshlq_m_r(uint8x16_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_s16))) int16x8_t vshlq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_s16))) int16x8_t vshlq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_s32))) int32x4_t vshlq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_s32))) int32x4_t vshlq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_s8))) int8x16_t vshlq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_s8))) int8x16_t vshlq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_u16))) uint16x8_t vshlq_m_u16(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_u16))) uint16x8_t vshlq_m(uint16x8_t, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_u32))) uint32x4_t vshlq_m_u32(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_u32))) uint32x4_t vshlq_m(uint32x4_t, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_u8))) uint8x16_t vshlq_m_u8(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_m_u8))) uint8x16_t vshlq_m(uint8x16_t, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_s16))) int16x8_t vshlq_n_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_s16))) int16x8_t vshlq_n(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_s32))) int32x4_t vshlq_n_s32(int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_s32))) int32x4_t vshlq_n(int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_s8))) int8x16_t vshlq_n_s8(int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_s8))) int8x16_t vshlq_n(int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_u16))) uint16x8_t vshlq_n_u16(uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_u16))) uint16x8_t vshlq_n(uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_u32))) uint32x4_t vshlq_n_u32(uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_u32))) uint32x4_t vshlq_n(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_u8))) uint8x16_t vshlq_n_u8(uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_n_u8))) uint8x16_t vshlq_n(uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_s16))) int16x8_t vshlq_r_s16(int16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_s16))) int16x8_t vshlq_r(int16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_s32))) int32x4_t vshlq_r_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_s32))) int32x4_t vshlq_r(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_s8))) int8x16_t vshlq_r_s8(int8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_s8))) int8x16_t vshlq_r(int8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_u16))) uint16x8_t vshlq_r_u16(uint16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_u16))) uint16x8_t vshlq_r(uint16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_u32))) uint32x4_t vshlq_r_u32(uint32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_u32))) uint32x4_t vshlq_r(uint32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_u8))) uint8x16_t vshlq_r_u8(uint8x16_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_r_u8))) uint8x16_t vshlq_r(uint8x16_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_s16))) int16x8_t vshlq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_s16))) int16x8_t vshlq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_s32))) int32x4_t vshlq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_s32))) int32x4_t vshlq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_s8))) int8x16_t vshlq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_s8))) int8x16_t vshlq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_u16))) uint16x8_t vshlq_u16(uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_u16))) uint16x8_t vshlq(uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_u32))) uint32x4_t vshlq_u32(uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_u32))) uint32x4_t vshlq(uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_u8))) uint8x16_t vshlq_u8(uint8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_u8))) uint8x16_t vshlq(uint8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_s16))) int16x8_t vshlq_x_n_s16(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_s16))) int16x8_t vshlq_x_n(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_s32))) int32x4_t vshlq_x_n_s32(int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_s32))) int32x4_t vshlq_x_n(int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_s8))) int8x16_t vshlq_x_n_s8(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_s8))) int8x16_t vshlq_x_n(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_u16))) uint16x8_t vshlq_x_n_u16(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_u16))) uint16x8_t vshlq_x_n(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_u32))) uint32x4_t vshlq_x_n_u32(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_u32))) uint32x4_t vshlq_x_n(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_u8))) uint8x16_t vshlq_x_n_u8(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_n_u8))) uint8x16_t vshlq_x_n(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_s16))) int16x8_t vshlq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_s16))) int16x8_t vshlq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_s32))) int32x4_t vshlq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_s32))) int32x4_t vshlq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_s8))) int8x16_t vshlq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_s8))) int8x16_t vshlq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_u16))) uint16x8_t vshlq_x_u16(uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_u16))) uint16x8_t vshlq_x(uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_u32))) uint32x4_t vshlq_x_u32(uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_u32))) uint32x4_t vshlq_x(uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_u8))) uint8x16_t vshlq_x_u8(uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshlq_x_u8))) uint8x16_t vshlq_x(uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_m_n_s16))) int8x16_t vshrnbq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_m_n_s16))) int8x16_t vshrnbq_m(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_m_n_s32))) int16x8_t vshrnbq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_m_n_s32))) int16x8_t vshrnbq_m(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_m_n_u16))) uint8x16_t vshrnbq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_m_n_u16))) uint8x16_t vshrnbq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_m_n_u32))) uint16x8_t vshrnbq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_m_n_u32))) uint16x8_t vshrnbq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_n_s16))) int8x16_t vshrnbq_n_s16(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_n_s16))) int8x16_t vshrnbq(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_n_s32))) int16x8_t vshrnbq_n_s32(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_n_s32))) int16x8_t vshrnbq(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_n_u16))) uint8x16_t vshrnbq_n_u16(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_n_u16))) uint8x16_t vshrnbq(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_n_u32))) uint16x8_t vshrnbq_n_u32(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrnbq_n_u32))) uint16x8_t vshrnbq(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_m_n_s16))) int8x16_t vshrntq_m_n_s16(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_m_n_s16))) int8x16_t vshrntq_m(int8x16_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_m_n_s32))) int16x8_t vshrntq_m_n_s32(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_m_n_s32))) int16x8_t vshrntq_m(int16x8_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_m_n_u16))) uint8x16_t vshrntq_m_n_u16(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_m_n_u16))) uint8x16_t vshrntq_m(uint8x16_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_m_n_u32))) uint16x8_t vshrntq_m_n_u32(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_m_n_u32))) uint16x8_t vshrntq_m(uint16x8_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_n_s16))) int8x16_t vshrntq_n_s16(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_n_s16))) int8x16_t vshrntq(int8x16_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_n_s32))) int16x8_t vshrntq_n_s32(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_n_s32))) int16x8_t vshrntq(int16x8_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_n_u16))) uint8x16_t vshrntq_n_u16(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_n_u16))) uint8x16_t vshrntq(uint8x16_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_n_u32))) uint16x8_t vshrntq_n_u32(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrntq_n_u32))) uint16x8_t vshrntq(uint16x8_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_s16))) int16x8_t vshrq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_s16))) int16x8_t vshrq_m(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_s32))) int32x4_t vshrq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_s32))) int32x4_t vshrq_m(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_s8))) int8x16_t vshrq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_s8))) int8x16_t vshrq_m(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_u16))) uint16x8_t vshrq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_u16))) uint16x8_t vshrq_m(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_u32))) uint32x4_t vshrq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_u32))) uint32x4_t vshrq_m(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_u8))) uint8x16_t vshrq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_m_n_u8))) uint8x16_t vshrq_m(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_s16))) int16x8_t vshrq_n_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_s16))) int16x8_t vshrq(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_s32))) int32x4_t vshrq_n_s32(int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_s32))) int32x4_t vshrq(int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_s8))) int8x16_t vshrq_n_s8(int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_s8))) int8x16_t vshrq(int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_u16))) uint16x8_t vshrq_n_u16(uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_u16))) uint16x8_t vshrq(uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_u32))) uint32x4_t vshrq_n_u32(uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_u32))) uint32x4_t vshrq(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_u8))) uint8x16_t vshrq_n_u8(uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_n_u8))) uint8x16_t vshrq(uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_s16))) int16x8_t vshrq_x_n_s16(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_s16))) int16x8_t vshrq_x(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_s32))) int32x4_t vshrq_x_n_s32(int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_s32))) int32x4_t vshrq_x(int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_s8))) int8x16_t vshrq_x_n_s8(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_s8))) int8x16_t vshrq_x(int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_u16))) uint16x8_t vshrq_x_n_u16(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_u16))) uint16x8_t vshrq_x(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_u32))) uint32x4_t vshrq_x_n_u32(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_u32))) uint32x4_t vshrq_x(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_u8))) uint8x16_t vshrq_x_n_u8(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vshrq_x_n_u8))) uint8x16_t vshrq_x(uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_s16))) int16x8_t vsliq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_s16))) int16x8_t vsliq_m(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_s32))) int32x4_t vsliq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_s32))) int32x4_t vsliq_m(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_s8))) int8x16_t vsliq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_s8))) int8x16_t vsliq_m(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_u16))) uint16x8_t vsliq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_u16))) uint16x8_t vsliq_m(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_u32))) uint32x4_t vsliq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_u32))) uint32x4_t vsliq_m(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_u8))) uint8x16_t vsliq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_m_n_u8))) uint8x16_t vsliq_m(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_s16))) int16x8_t vsliq_n_s16(int16x8_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_s16))) int16x8_t vsliq(int16x8_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_s32))) int32x4_t vsliq_n_s32(int32x4_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_s32))) int32x4_t vsliq(int32x4_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_s8))) int8x16_t vsliq_n_s8(int8x16_t, int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_s8))) int8x16_t vsliq(int8x16_t, int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_u16))) uint16x8_t vsliq_n_u16(uint16x8_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_u16))) uint16x8_t vsliq(uint16x8_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_u32))) uint32x4_t vsliq_n_u32(uint32x4_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_u32))) uint32x4_t vsliq(uint32x4_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_u8))) uint8x16_t vsliq_n_u8(uint8x16_t, uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsliq_n_u8))) uint8x16_t vsliq(uint8x16_t, uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_s16))) int16x8_t vsriq_m_n_s16(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_s16))) int16x8_t vsriq_m(int16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_s32))) int32x4_t vsriq_m_n_s32(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_s32))) int32x4_t vsriq_m(int32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_s8))) int8x16_t vsriq_m_n_s8(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_s8))) int8x16_t vsriq_m(int8x16_t, int8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_u16))) uint16x8_t vsriq_m_n_u16(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_u16))) uint16x8_t vsriq_m(uint16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_u32))) uint32x4_t vsriq_m_n_u32(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_u32))) uint32x4_t vsriq_m(uint32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_u8))) uint8x16_t vsriq_m_n_u8(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_m_n_u8))) uint8x16_t vsriq_m(uint8x16_t, uint8x16_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_s16))) int16x8_t vsriq_n_s16(int16x8_t, int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_s16))) int16x8_t vsriq(int16x8_t, int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_s32))) int32x4_t vsriq_n_s32(int32x4_t, int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_s32))) int32x4_t vsriq(int32x4_t, int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_s8))) int8x16_t vsriq_n_s8(int8x16_t, int8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_s8))) int8x16_t vsriq(int8x16_t, int8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_u16))) uint16x8_t vsriq_n_u16(uint16x8_t, uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_u16))) uint16x8_t vsriq(uint16x8_t, uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_u32))) uint32x4_t vsriq_n_u32(uint32x4_t, uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_u32))) uint32x4_t vsriq(uint32x4_t, uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_u8))) uint8x16_t vsriq_n_u8(uint8x16_t, uint8x16_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsriq_n_u8))) uint8x16_t vsriq(uint8x16_t, uint8x16_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_s16))) void vst1q_p_s16(int16_t *, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_s16))) void vst1q_p(int16_t *, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_s32))) void vst1q_p_s32(int32_t *, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_s32))) void vst1q_p(int32_t *, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_s8))) void vst1q_p_s8(int8_t *, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_s8))) void vst1q_p(int8_t *, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_u16))) void vst1q_p_u16(uint16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_u16))) void vst1q_p(uint16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_u32))) void vst1q_p_u32(uint32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_u32))) void vst1q_p(uint32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_u8))) void vst1q_p_u8(uint8_t *, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_u8))) void vst1q_p(uint8_t *, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_s16))) void vst1q_s16(int16_t *, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_s16))) void vst1q(int16_t *, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_s32))) void vst1q_s32(int32_t *, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_s32))) void vst1q(int32_t *, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_s8))) void vst1q_s8(int8_t *, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_s8))) void vst1q(int8_t *, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_u16))) void vst1q_u16(uint16_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_u16))) void vst1q(uint16_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_u32))) void vst1q_u32(uint32_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_u32))) void vst1q(uint32_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_u8))) void vst1q_u8(uint8_t *, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_u8))) void vst1q(uint8_t *, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst2q_s16))) void vst2q_s16(int16_t *, int16x8x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst2q_s16))) void vst2q(int16_t *, int16x8x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst2q_s32))) void vst2q_s32(int32_t *, int32x4x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst2q_s32))) void vst2q(int32_t *, int32x4x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst2q_s8))) void vst2q_s8(int8_t *, int8x16x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst2q_s8))) void vst2q(int8_t *, int8x16x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst2q_u16))) void vst2q_u16(uint16_t *, uint16x8x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst2q_u16))) void vst2q(uint16_t *, uint16x8x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst2q_u32))) void vst2q_u32(uint32_t *, uint32x4x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst2q_u32))) void vst2q(uint32_t *, uint32x4x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst2q_u8))) void vst2q_u8(uint8_t *, uint8x16x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst2q_u8))) void vst2q(uint8_t *, uint8x16x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst4q_s16))) void vst4q_s16(int16_t *, int16x8x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst4q_s16))) void vst4q(int16_t *, int16x8x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst4q_s32))) void vst4q_s32(int32_t *, int32x4x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst4q_s32))) void vst4q(int32_t *, int32x4x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst4q_s8))) void vst4q_s8(int8_t *, int8x16x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst4q_s8))) void vst4q(int8_t *, int8x16x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst4q_u16))) void vst4q_u16(uint16_t *, uint16x8x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst4q_u16))) void vst4q(uint16_t *, uint16x8x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst4q_u32))) void vst4q_u32(uint32_t *, uint32x4x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst4q_u32))) void vst4q(uint32_t *, uint32x4x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst4q_u8))) void vst4q_u8(uint8_t *, uint8x16x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst4q_u8))) void vst4q(uint8_t *, uint8x16x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_s16))) void vstrbq_p_s16(int8_t *, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_s16))) void vstrbq_p(int8_t *, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_s32))) void vstrbq_p_s32(int8_t *, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_s32))) void vstrbq_p(int8_t *, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_s8))) void vstrbq_p_s8(int8_t *, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_s8))) void vstrbq_p(int8_t *, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_u16))) void vstrbq_p_u16(uint8_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_u16))) void vstrbq_p(uint8_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_u32))) void vstrbq_p_u32(uint8_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_u32))) void vstrbq_p(uint8_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_u8))) void vstrbq_p_u8(uint8_t *, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_p_u8))) void vstrbq_p(uint8_t *, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_s16))) void vstrbq_s16(int8_t *, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_s16))) void vstrbq(int8_t *, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_s32))) void vstrbq_s32(int8_t *, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_s32))) void vstrbq(int8_t *, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_s8))) void vstrbq_s8(int8_t *, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_s8))) void vstrbq(int8_t *, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s16))) void vstrbq_scatter_offset_p_s16(int8_t *, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s16))) void vstrbq_scatter_offset_p(int8_t *, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s32))) void vstrbq_scatter_offset_p_s32(int8_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s32))) void vstrbq_scatter_offset_p(int8_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s8))) void vstrbq_scatter_offset_p_s8(int8_t *, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_s8))) void vstrbq_scatter_offset_p(int8_t *, uint8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u16))) void vstrbq_scatter_offset_p_u16(uint8_t *, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u16))) void vstrbq_scatter_offset_p(uint8_t *, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u32))) void vstrbq_scatter_offset_p_u32(uint8_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u32))) void vstrbq_scatter_offset_p(uint8_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u8))) void vstrbq_scatter_offset_p_u8(uint8_t *, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_p_u8))) void vstrbq_scatter_offset_p(uint8_t *, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_s16))) void vstrbq_scatter_offset_s16(int8_t *, uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_s16))) void vstrbq_scatter_offset(int8_t *, uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_s32))) void vstrbq_scatter_offset_s32(int8_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_s32))) void vstrbq_scatter_offset(int8_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_s8))) void vstrbq_scatter_offset_s8(int8_t *, uint8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_s8))) void vstrbq_scatter_offset(int8_t *, uint8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_u16))) void vstrbq_scatter_offset_u16(uint8_t *, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_u16))) void vstrbq_scatter_offset(uint8_t *, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_u32))) void vstrbq_scatter_offset_u32(uint8_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_u32))) void vstrbq_scatter_offset(uint8_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_u8))) void vstrbq_scatter_offset_u8(uint8_t *, uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_scatter_offset_u8))) void vstrbq_scatter_offset(uint8_t *, uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_u16))) void vstrbq_u16(uint8_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_u16))) void vstrbq(uint8_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_u32))) void vstrbq_u32(uint8_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_u32))) void vstrbq(uint8_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_u8))) void vstrbq_u8(uint8_t *, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrbq_u8))) void vstrbq(uint8_t *, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_p_s64))) void vstrdq_scatter_base_p_s64(uint64x2_t, int, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_p_s64))) void vstrdq_scatter_base_p(uint64x2_t, int, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_p_u64))) void vstrdq_scatter_base_p_u64(uint64x2_t, int, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_p_u64))) void vstrdq_scatter_base_p(uint64x2_t, int, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_s64))) void vstrdq_scatter_base_s64(uint64x2_t, int, int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_s64))) void vstrdq_scatter_base(uint64x2_t, int, int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_u64))) void vstrdq_scatter_base_u64(uint64x2_t, int, uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_u64))) void vstrdq_scatter_base(uint64x2_t, int, uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_p_s64))) void vstrdq_scatter_base_wb_p_s64(uint64x2_t *, int, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_p_s64))) void vstrdq_scatter_base_wb_p(uint64x2_t *, int, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_p_u64))) void vstrdq_scatter_base_wb_p_u64(uint64x2_t *, int, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_p_u64))) void vstrdq_scatter_base_wb_p(uint64x2_t *, int, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_s64))) void vstrdq_scatter_base_wb_s64(uint64x2_t *, int, int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_s64))) void vstrdq_scatter_base_wb(uint64x2_t *, int, int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_u64))) void vstrdq_scatter_base_wb_u64(uint64x2_t *, int, uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_base_wb_u64))) void vstrdq_scatter_base_wb(uint64x2_t *, int, uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_offset_p_s64))) void vstrdq_scatter_offset_p_s64(int64_t *, uint64x2_t, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_offset_p_s64))) void vstrdq_scatter_offset_p(int64_t *, uint64x2_t, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_offset_p_u64))) void vstrdq_scatter_offset_p_u64(uint64_t *, uint64x2_t, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_offset_p_u64))) void vstrdq_scatter_offset_p(uint64_t *, uint64x2_t, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_offset_s64))) void vstrdq_scatter_offset_s64(int64_t *, uint64x2_t, int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_offset_s64))) void vstrdq_scatter_offset(int64_t *, uint64x2_t, int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_offset_u64))) void vstrdq_scatter_offset_u64(uint64_t *, uint64x2_t, uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_offset_u64))) void vstrdq_scatter_offset(uint64_t *, uint64x2_t, uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_p_s64))) void vstrdq_scatter_shifted_offset_p_s64(int64_t *, uint64x2_t, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_p_s64))) void vstrdq_scatter_shifted_offset_p(int64_t *, uint64x2_t, int64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_p_u64))) void vstrdq_scatter_shifted_offset_p_u64(uint64_t *, uint64x2_t, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_p_u64))) void vstrdq_scatter_shifted_offset_p(uint64_t *, uint64x2_t, uint64x2_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_s64))) void vstrdq_scatter_shifted_offset_s64(int64_t *, uint64x2_t, int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_s64))) void vstrdq_scatter_shifted_offset(int64_t *, uint64x2_t, int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_u64))) void vstrdq_scatter_shifted_offset_u64(uint64_t *, uint64x2_t, uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrdq_scatter_shifted_offset_u64))) void vstrdq_scatter_shifted_offset(uint64_t *, uint64x2_t, uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_s16))) void vstrhq_p_s16(int16_t *, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_s16))) void vstrhq_p(int16_t *, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_s32))) void vstrhq_p_s32(int16_t *, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_s32))) void vstrhq_p(int16_t *, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_u16))) void vstrhq_p_u16(uint16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_u16))) void vstrhq_p(uint16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_u32))) void vstrhq_p_u32(uint16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_u32))) void vstrhq_p(uint16_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_s16))) void vstrhq_s16(int16_t *, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_s16))) void vstrhq(int16_t *, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_s32))) void vstrhq_s32(int16_t *, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_s32))) void vstrhq(int16_t *, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_s16))) void vstrhq_scatter_offset_p_s16(int16_t *, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_s16))) void vstrhq_scatter_offset_p(int16_t *, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_s32))) void vstrhq_scatter_offset_p_s32(int16_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_s32))) void vstrhq_scatter_offset_p(int16_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_u16))) void vstrhq_scatter_offset_p_u16(uint16_t *, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_u16))) void vstrhq_scatter_offset_p(uint16_t *, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_u32))) void vstrhq_scatter_offset_p_u32(uint16_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_u32))) void vstrhq_scatter_offset_p(uint16_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_s16))) void vstrhq_scatter_offset_s16(int16_t *, uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_s16))) void vstrhq_scatter_offset(int16_t *, uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_s32))) void vstrhq_scatter_offset_s32(int16_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_s32))) void vstrhq_scatter_offset(int16_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_u16))) void vstrhq_scatter_offset_u16(uint16_t *, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_u16))) void vstrhq_scatter_offset(uint16_t *, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_u32))) void vstrhq_scatter_offset_u32(uint16_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_u32))) void vstrhq_scatter_offset(uint16_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_s16))) void vstrhq_scatter_shifted_offset_p_s16(int16_t *, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_s16))) void vstrhq_scatter_shifted_offset_p(int16_t *, uint16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_s32))) void vstrhq_scatter_shifted_offset_p_s32(int16_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_s32))) void vstrhq_scatter_shifted_offset_p(int16_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_u16))) void vstrhq_scatter_shifted_offset_p_u16(uint16_t *, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_u16))) void vstrhq_scatter_shifted_offset_p(uint16_t *, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_u32))) void vstrhq_scatter_shifted_offset_p_u32(uint16_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_u32))) void vstrhq_scatter_shifted_offset_p(uint16_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_s16))) void vstrhq_scatter_shifted_offset_s16(int16_t *, uint16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_s16))) void vstrhq_scatter_shifted_offset(int16_t *, uint16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_s32))) void vstrhq_scatter_shifted_offset_s32(int16_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_s32))) void vstrhq_scatter_shifted_offset(int16_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_u16))) void vstrhq_scatter_shifted_offset_u16(uint16_t *, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_u16))) void vstrhq_scatter_shifted_offset(uint16_t *, uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_u32))) void vstrhq_scatter_shifted_offset_u32(uint16_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_u32))) void vstrhq_scatter_shifted_offset(uint16_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_u16))) void vstrhq_u16(uint16_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_u16))) void vstrhq(uint16_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_u32))) void vstrhq_u32(uint16_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_u32))) void vstrhq(uint16_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_p_s32))) void vstrwq_p_s32(int32_t *, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_p_s32))) void vstrwq_p(int32_t *, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_p_u32))) void vstrwq_p_u32(uint32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_p_u32))) void vstrwq_p(uint32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_s32))) void vstrwq_s32(int32_t *, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_s32))) void vstrwq(int32_t *, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_p_s32))) void vstrwq_scatter_base_p_s32(uint32x4_t, int, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_p_s32))) void vstrwq_scatter_base_p(uint32x4_t, int, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_p_u32))) void vstrwq_scatter_base_p_u32(uint32x4_t, int, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_p_u32))) void vstrwq_scatter_base_p(uint32x4_t, int, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_s32))) void vstrwq_scatter_base_s32(uint32x4_t, int, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_s32))) void vstrwq_scatter_base(uint32x4_t, int, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_u32))) void vstrwq_scatter_base_u32(uint32x4_t, int, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_u32))) void vstrwq_scatter_base(uint32x4_t, int, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_s32))) void vstrwq_scatter_base_wb_p_s32(uint32x4_t *, int, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_s32))) void vstrwq_scatter_base_wb_p(uint32x4_t *, int, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_u32))) void vstrwq_scatter_base_wb_p_u32(uint32x4_t *, int, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_u32))) void vstrwq_scatter_base_wb_p(uint32x4_t *, int, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_s32))) void vstrwq_scatter_base_wb_s32(uint32x4_t *, int, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_s32))) void vstrwq_scatter_base_wb(uint32x4_t *, int, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_u32))) void vstrwq_scatter_base_wb_u32(uint32x4_t *, int, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_u32))) void vstrwq_scatter_base_wb(uint32x4_t *, int, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_s32))) void vstrwq_scatter_offset_p_s32(int32_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_s32))) void vstrwq_scatter_offset_p(int32_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_u32))) void vstrwq_scatter_offset_p_u32(uint32_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_u32))) void vstrwq_scatter_offset_p(uint32_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_s32))) void vstrwq_scatter_offset_s32(int32_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_s32))) void vstrwq_scatter_offset(int32_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_u32))) void vstrwq_scatter_offset_u32(uint32_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_u32))) void vstrwq_scatter_offset(uint32_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_s32))) void vstrwq_scatter_shifted_offset_p_s32(int32_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_s32))) void vstrwq_scatter_shifted_offset_p(int32_t *, uint32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_u32))) void vstrwq_scatter_shifted_offset_p_u32(uint32_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_u32))) void vstrwq_scatter_shifted_offset_p(uint32_t *, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_s32))) void vstrwq_scatter_shifted_offset_s32(int32_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_s32))) void vstrwq_scatter_shifted_offset(int32_t *, uint32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_u32))) void vstrwq_scatter_shifted_offset_u32(uint32_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_u32))) void vstrwq_scatter_shifted_offset(uint32_t *, uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_u32))) void vstrwq_u32(uint32_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_u32))) void vstrwq(uint32_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_s16))) int16x8_t vsubq_m_n_s16(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_s16))) int16x8_t vsubq_m(int16x8_t, int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_s32))) int32x4_t vsubq_m_n_s32(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_s32))) int32x4_t vsubq_m(int32x4_t, int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_s8))) int8x16_t vsubq_m_n_s8(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_s8))) int8x16_t vsubq_m(int8x16_t, int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_u16))) uint16x8_t vsubq_m_n_u16(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_u16))) uint16x8_t vsubq_m(uint16x8_t, uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_u32))) uint32x4_t vsubq_m_n_u32(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_u32))) uint32x4_t vsubq_m(uint32x4_t, uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_u8))) uint8x16_t vsubq_m_n_u8(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_u8))) uint8x16_t vsubq_m(uint8x16_t, uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_s16))) int16x8_t vsubq_m_s16(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_s16))) int16x8_t vsubq_m(int16x8_t, int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_s32))) int32x4_t vsubq_m_s32(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_s32))) int32x4_t vsubq_m(int32x4_t, int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_s8))) int8x16_t vsubq_m_s8(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_s8))) int8x16_t vsubq_m(int8x16_t, int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_u16))) uint16x8_t vsubq_m_u16(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_u16))) uint16x8_t vsubq_m(uint16x8_t, uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_u32))) uint32x4_t vsubq_m_u32(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_u32))) uint32x4_t vsubq_m(uint32x4_t, uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_u8))) uint8x16_t vsubq_m_u8(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_u8))) uint8x16_t vsubq_m(uint8x16_t, uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_s16))) int16x8_t vsubq_n_s16(int16x8_t, int16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_s16))) int16x8_t vsubq(int16x8_t, int16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_s32))) int32x4_t vsubq_n_s32(int32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_s32))) int32x4_t vsubq(int32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_s8))) int8x16_t vsubq_n_s8(int8x16_t, int8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_s8))) int8x16_t vsubq(int8x16_t, int8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_u16))) uint16x8_t vsubq_n_u16(uint16x8_t, uint16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_u16))) uint16x8_t vsubq(uint16x8_t, uint16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_u32))) uint32x4_t vsubq_n_u32(uint32x4_t, uint32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_u32))) uint32x4_t vsubq(uint32x4_t, uint32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_u8))) uint8x16_t vsubq_n_u8(uint8x16_t, uint8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_u8))) uint8x16_t vsubq(uint8x16_t, uint8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_s16))) int16x8_t vsubq_s16(int16x8_t, int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_s16))) int16x8_t vsubq(int16x8_t, int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_s32))) int32x4_t vsubq_s32(int32x4_t, int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_s32))) int32x4_t vsubq(int32x4_t, int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_s8))) int8x16_t vsubq_s8(int8x16_t, int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_s8))) int8x16_t vsubq(int8x16_t, int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_u16))) uint16x8_t vsubq_u16(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_u16))) uint16x8_t vsubq(uint16x8_t, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_u32))) uint32x4_t vsubq_u32(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_u32))) uint32x4_t vsubq(uint32x4_t, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_u8))) uint8x16_t vsubq_u8(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_u8))) uint8x16_t vsubq(uint8x16_t, uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_s16))) int16x8_t vsubq_x_n_s16(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_s16))) int16x8_t vsubq_x(int16x8_t, int16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_s32))) int32x4_t vsubq_x_n_s32(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_s32))) int32x4_t vsubq_x(int32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_s8))) int8x16_t vsubq_x_n_s8(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_s8))) int8x16_t vsubq_x(int8x16_t, int8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_u16))) uint16x8_t vsubq_x_n_u16(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_u16))) uint16x8_t vsubq_x(uint16x8_t, uint16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_u32))) uint32x4_t vsubq_x_n_u32(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_u32))) uint32x4_t vsubq_x(uint32x4_t, uint32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_u8))) uint8x16_t vsubq_x_n_u8(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_u8))) uint8x16_t vsubq_x(uint8x16_t, uint8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_s16))) int16x8_t vsubq_x_s16(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_s16))) int16x8_t vsubq_x(int16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_s32))) int32x4_t vsubq_x_s32(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_s32))) int32x4_t vsubq_x(int32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_s8))) int8x16_t vsubq_x_s8(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_s8))) int8x16_t vsubq_x(int8x16_t, int8x16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_u16))) uint16x8_t vsubq_x_u16(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_u16))) uint16x8_t vsubq_x(uint16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_u32))) uint32x4_t vsubq_x_u32(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_u32))) uint32x4_t vsubq_x(uint32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_u8))) uint8x16_t vsubq_x_u8(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_u8))) uint8x16_t vsubq_x(uint8x16_t, uint8x16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_s16))) int16x8_t vuninitializedq(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_s32))) int32x4_t vuninitializedq(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_s64))) int64x2_t vuninitializedq(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_s8))) int8x16_t vuninitializedq(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_u16))) uint16x8_t vuninitializedq(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_u32))) uint32x4_t vuninitializedq(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_u64))) uint64x2_t vuninitializedq(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_u8))) uint8x16_t vuninitializedq(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_s16))) int16x8_t vuninitializedq_s16(); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_s32))) int32x4_t vuninitializedq_s32(); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_s64))) int64x2_t vuninitializedq_s64(); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_s8))) int8x16_t vuninitializedq_s8(); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_u16))) uint16x8_t vuninitializedq_u16(); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_u32))) uint32x4_t vuninitializedq_u32(); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_u64))) uint64x2_t vuninitializedq_u64(); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_u8))) uint8x16_t vuninitializedq_u8(); #endif /* (!defined __ARM_MVE_PRESERVE_USER_NAMESPACE) */ #if (__ARM_FEATURE_MVE & 2) && (!defined __ARM_MVE_PRESERVE_USER_NAMESPACE) static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_f16))) float16x8_t vabdq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_f16))) float16x8_t vabdq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_f32))) float32x4_t vabdq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_f32))) float32x4_t vabdq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_f16))) float16x8_t vabdq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_f16))) float16x8_t vabdq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_f32))) float32x4_t vabdq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_m_f32))) float32x4_t vabdq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_f16))) float16x8_t vabdq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_f16))) float16x8_t vabdq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_f32))) float32x4_t vabdq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabdq_x_f32))) float32x4_t vabdq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_f16))) float16x8_t vabsq_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_f16))) float16x8_t vabsq(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_f32))) float32x4_t vabsq_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_f32))) float32x4_t vabsq(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_f16))) float16x8_t vabsq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_f16))) float16x8_t vabsq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_f32))) float32x4_t vabsq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_m_f32))) float32x4_t vabsq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_f16))) float16x8_t vabsq_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_f16))) float16x8_t vabsq_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_f32))) float32x4_t vabsq_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vabsq_x_f32))) float32x4_t vabsq_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_f16))) float16x8_t vaddq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_f16))) float16x8_t vaddq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_f32))) float32x4_t vaddq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_f32))) float32x4_t vaddq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_f16))) float16x8_t vaddq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_f16))) float16x8_t vaddq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_f32))) float32x4_t vaddq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_f32))) float32x4_t vaddq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_f16))) float16x8_t vaddq_m_n_f16(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_f16))) float16x8_t vaddq_m(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_f32))) float32x4_t vaddq_m_n_f32(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_m_n_f32))) float32x4_t vaddq_m(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_f16))) float16x8_t vaddq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_f16))) float16x8_t vaddq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_f32))) float32x4_t vaddq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_n_f32))) float32x4_t vaddq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_f16))) float16x8_t vaddq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_f16))) float16x8_t vaddq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_f32))) float32x4_t vaddq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_f32))) float32x4_t vaddq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_f16))) float16x8_t vaddq_x_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_f16))) float16x8_t vaddq_x(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_f32))) float32x4_t vaddq_x_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vaddq_x_n_f32))) float32x4_t vaddq_x(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_f16))) float16x8_t vandq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_f16))) float16x8_t vandq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_f32))) float32x4_t vandq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_f32))) float32x4_t vandq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_f16))) float16x8_t vandq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_f16))) float16x8_t vandq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_f32))) float32x4_t vandq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_m_f32))) float32x4_t vandq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_f16))) float16x8_t vandq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_f16))) float16x8_t vandq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_f32))) float32x4_t vandq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vandq_x_f32))) float32x4_t vandq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_f16))) float16x8_t vbicq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_f16))) float16x8_t vbicq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_f32))) float32x4_t vbicq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_f32))) float32x4_t vbicq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_f16))) float16x8_t vbicq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_f16))) float16x8_t vbicq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_f32))) float32x4_t vbicq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_m_f32))) float32x4_t vbicq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_f16))) float16x8_t vbicq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_f16))) float16x8_t vbicq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_f32))) float32x4_t vbicq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbicq_x_f32))) float32x4_t vbicq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_f16))) float16x8_t vbrsrq_m_n_f16(float16x8_t, float16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_f16))) float16x8_t vbrsrq_m(float16x8_t, float16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_f32))) float32x4_t vbrsrq_m_n_f32(float32x4_t, float32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_m_n_f32))) float32x4_t vbrsrq_m(float32x4_t, float32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_f16))) float16x8_t vbrsrq_n_f16(float16x8_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_f16))) float16x8_t vbrsrq(float16x8_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_f32))) float32x4_t vbrsrq_n_f32(float32x4_t, int32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_n_f32))) float32x4_t vbrsrq(float32x4_t, int32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_f16))) float16x8_t vbrsrq_x_n_f16(float16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_f16))) float16x8_t vbrsrq_x(float16x8_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_f32))) float32x4_t vbrsrq_x_n_f32(float32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vbrsrq_x_n_f32))) float32x4_t vbrsrq_x(float32x4_t, int32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_f16))) float16x8_t vcaddq_rot270_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_f16))) float16x8_t vcaddq_rot270(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_f32))) float32x4_t vcaddq_rot270_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_f32))) float32x4_t vcaddq_rot270(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_f16))) float16x8_t vcaddq_rot270_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_f16))) float16x8_t vcaddq_rot270_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_f32))) float32x4_t vcaddq_rot270_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_m_f32))) float32x4_t vcaddq_rot270_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_f16))) float16x8_t vcaddq_rot270_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_f16))) float16x8_t vcaddq_rot270_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_f32))) float32x4_t vcaddq_rot270_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot270_x_f32))) float32x4_t vcaddq_rot270_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_f16))) float16x8_t vcaddq_rot90_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_f16))) float16x8_t vcaddq_rot90(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_f32))) float32x4_t vcaddq_rot90_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_f32))) float32x4_t vcaddq_rot90(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_f16))) float16x8_t vcaddq_rot90_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_f16))) float16x8_t vcaddq_rot90_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_f32))) float32x4_t vcaddq_rot90_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_m_f32))) float32x4_t vcaddq_rot90_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_f16))) float16x8_t vcaddq_rot90_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_f16))) float16x8_t vcaddq_rot90_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_f32))) float32x4_t vcaddq_rot90_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcaddq_rot90_x_f32))) float32x4_t vcaddq_rot90_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_f16))) float16x8_t vcmlaq_f16(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_f16))) float16x8_t vcmlaq(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_f32))) float32x4_t vcmlaq_f32(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_f32))) float32x4_t vcmlaq(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_m_f16))) float16x8_t vcmlaq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_m_f16))) float16x8_t vcmlaq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_m_f32))) float32x4_t vcmlaq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_m_f32))) float32x4_t vcmlaq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot180_f16))) float16x8_t vcmlaq_rot180_f16(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot180_f16))) float16x8_t vcmlaq_rot180(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot180_f32))) float32x4_t vcmlaq_rot180_f32(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot180_f32))) float32x4_t vcmlaq_rot180(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot180_m_f16))) float16x8_t vcmlaq_rot180_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot180_m_f16))) float16x8_t vcmlaq_rot180_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot180_m_f32))) float32x4_t vcmlaq_rot180_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot180_m_f32))) float32x4_t vcmlaq_rot180_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot270_f16))) float16x8_t vcmlaq_rot270_f16(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot270_f16))) float16x8_t vcmlaq_rot270(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot270_f32))) float32x4_t vcmlaq_rot270_f32(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot270_f32))) float32x4_t vcmlaq_rot270(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot270_m_f16))) float16x8_t vcmlaq_rot270_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot270_m_f16))) float16x8_t vcmlaq_rot270_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot270_m_f32))) float32x4_t vcmlaq_rot270_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot270_m_f32))) float32x4_t vcmlaq_rot270_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot90_f16))) float16x8_t vcmlaq_rot90_f16(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot90_f16))) float16x8_t vcmlaq_rot90(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot90_f32))) float32x4_t vcmlaq_rot90_f32(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot90_f32))) float32x4_t vcmlaq_rot90(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot90_m_f16))) float16x8_t vcmlaq_rot90_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot90_m_f16))) float16x8_t vcmlaq_rot90_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot90_m_f32))) float32x4_t vcmlaq_rot90_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmlaq_rot90_m_f32))) float32x4_t vcmlaq_rot90_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_f16))) mve_pred16_t vcmpeqq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_f16))) mve_pred16_t vcmpeqq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_f32))) mve_pred16_t vcmpeqq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_f32))) mve_pred16_t vcmpeqq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_f16))) mve_pred16_t vcmpeqq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_f16))) mve_pred16_t vcmpeqq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_f32))) mve_pred16_t vcmpeqq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_f32))) mve_pred16_t vcmpeqq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_f16))) mve_pred16_t vcmpeqq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_f16))) mve_pred16_t vcmpeqq_m(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_f32))) mve_pred16_t vcmpeqq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_m_n_f32))) mve_pred16_t vcmpeqq_m(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_f16))) mve_pred16_t vcmpeqq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_f16))) mve_pred16_t vcmpeqq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_f32))) mve_pred16_t vcmpeqq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpeqq_n_f32))) mve_pred16_t vcmpeqq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_f16))) mve_pred16_t vcmpgeq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_f16))) mve_pred16_t vcmpgeq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_f32))) mve_pred16_t vcmpgeq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_f32))) mve_pred16_t vcmpgeq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_f16))) mve_pred16_t vcmpgeq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_f16))) mve_pred16_t vcmpgeq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_f32))) mve_pred16_t vcmpgeq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_f32))) mve_pred16_t vcmpgeq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_f16))) mve_pred16_t vcmpgeq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_f16))) mve_pred16_t vcmpgeq_m(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_f32))) mve_pred16_t vcmpgeq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_m_n_f32))) mve_pred16_t vcmpgeq_m(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_f16))) mve_pred16_t vcmpgeq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_f16))) mve_pred16_t vcmpgeq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_f32))) mve_pred16_t vcmpgeq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgeq_n_f32))) mve_pred16_t vcmpgeq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_f16))) mve_pred16_t vcmpgtq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_f16))) mve_pred16_t vcmpgtq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_f32))) mve_pred16_t vcmpgtq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_f32))) mve_pred16_t vcmpgtq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_f16))) mve_pred16_t vcmpgtq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_f16))) mve_pred16_t vcmpgtq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_f32))) mve_pred16_t vcmpgtq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_f32))) mve_pred16_t vcmpgtq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_f16))) mve_pred16_t vcmpgtq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_f16))) mve_pred16_t vcmpgtq_m(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_f32))) mve_pred16_t vcmpgtq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_m_n_f32))) mve_pred16_t vcmpgtq_m(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_f16))) mve_pred16_t vcmpgtq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_f16))) mve_pred16_t vcmpgtq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_f32))) mve_pred16_t vcmpgtq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpgtq_n_f32))) mve_pred16_t vcmpgtq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_f16))) mve_pred16_t vcmpleq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_f16))) mve_pred16_t vcmpleq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_f32))) mve_pred16_t vcmpleq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_f32))) mve_pred16_t vcmpleq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_f16))) mve_pred16_t vcmpleq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_f16))) mve_pred16_t vcmpleq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_f32))) mve_pred16_t vcmpleq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_f32))) mve_pred16_t vcmpleq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_f16))) mve_pred16_t vcmpleq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_f16))) mve_pred16_t vcmpleq_m(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_f32))) mve_pred16_t vcmpleq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_m_n_f32))) mve_pred16_t vcmpleq_m(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_f16))) mve_pred16_t vcmpleq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_f16))) mve_pred16_t vcmpleq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_f32))) mve_pred16_t vcmpleq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpleq_n_f32))) mve_pred16_t vcmpleq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_f16))) mve_pred16_t vcmpltq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_f16))) mve_pred16_t vcmpltq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_f32))) mve_pred16_t vcmpltq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_f32))) mve_pred16_t vcmpltq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_f16))) mve_pred16_t vcmpltq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_f16))) mve_pred16_t vcmpltq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_f32))) mve_pred16_t vcmpltq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_f32))) mve_pred16_t vcmpltq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_f16))) mve_pred16_t vcmpltq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_f16))) mve_pred16_t vcmpltq_m(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_f32))) mve_pred16_t vcmpltq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_m_n_f32))) mve_pred16_t vcmpltq_m(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_f16))) mve_pred16_t vcmpltq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_f16))) mve_pred16_t vcmpltq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_f32))) mve_pred16_t vcmpltq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpltq_n_f32))) mve_pred16_t vcmpltq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_f16))) mve_pred16_t vcmpneq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_f16))) mve_pred16_t vcmpneq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_f32))) mve_pred16_t vcmpneq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_f32))) mve_pred16_t vcmpneq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_f16))) mve_pred16_t vcmpneq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_f16))) mve_pred16_t vcmpneq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_f32))) mve_pred16_t vcmpneq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_f32))) mve_pred16_t vcmpneq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_f16))) mve_pred16_t vcmpneq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_f16))) mve_pred16_t vcmpneq_m(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_f32))) mve_pred16_t vcmpneq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_m_n_f32))) mve_pred16_t vcmpneq_m(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_f16))) mve_pred16_t vcmpneq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_f16))) mve_pred16_t vcmpneq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_f32))) mve_pred16_t vcmpneq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmpneq_n_f32))) mve_pred16_t vcmpneq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_f16))) float16x8_t vcmulq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_f16))) float16x8_t vcmulq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_f32))) float32x4_t vcmulq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_f32))) float32x4_t vcmulq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_m_f16))) float16x8_t vcmulq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_m_f16))) float16x8_t vcmulq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_m_f32))) float32x4_t vcmulq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_m_f32))) float32x4_t vcmulq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_f16))) float16x8_t vcmulq_rot180_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_f16))) float16x8_t vcmulq_rot180(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_f32))) float32x4_t vcmulq_rot180_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_f32))) float32x4_t vcmulq_rot180(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_m_f16))) float16x8_t vcmulq_rot180_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_m_f16))) float16x8_t vcmulq_rot180_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_m_f32))) float32x4_t vcmulq_rot180_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_m_f32))) float32x4_t vcmulq_rot180_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_x_f16))) float16x8_t vcmulq_rot180_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_x_f16))) float16x8_t vcmulq_rot180_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_x_f32))) float32x4_t vcmulq_rot180_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot180_x_f32))) float32x4_t vcmulq_rot180_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_f16))) float16x8_t vcmulq_rot270_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_f16))) float16x8_t vcmulq_rot270(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_f32))) float32x4_t vcmulq_rot270_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_f32))) float32x4_t vcmulq_rot270(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_m_f16))) float16x8_t vcmulq_rot270_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_m_f16))) float16x8_t vcmulq_rot270_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_m_f32))) float32x4_t vcmulq_rot270_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_m_f32))) float32x4_t vcmulq_rot270_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_x_f16))) float16x8_t vcmulq_rot270_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_x_f16))) float16x8_t vcmulq_rot270_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_x_f32))) float32x4_t vcmulq_rot270_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot270_x_f32))) float32x4_t vcmulq_rot270_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_f16))) float16x8_t vcmulq_rot90_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_f16))) float16x8_t vcmulq_rot90(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_f32))) float32x4_t vcmulq_rot90_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_f32))) float32x4_t vcmulq_rot90(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_m_f16))) float16x8_t vcmulq_rot90_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_m_f16))) float16x8_t vcmulq_rot90_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_m_f32))) float32x4_t vcmulq_rot90_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_m_f32))) float32x4_t vcmulq_rot90_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_x_f16))) float16x8_t vcmulq_rot90_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_x_f16))) float16x8_t vcmulq_rot90_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_x_f32))) float32x4_t vcmulq_rot90_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_rot90_x_f32))) float32x4_t vcmulq_rot90_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_x_f16))) float16x8_t vcmulq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_x_f16))) float16x8_t vcmulq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_x_f32))) float32x4_t vcmulq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcmulq_x_f32))) float32x4_t vcmulq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_f16))) float16x8_t vcreateq_f16(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcreateq_f32))) float32x4_t vcreateq_f32(uint64_t, uint64_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_m_s16_f16))) int16x8_t vcvtaq_m_s16_f16(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_m_s16_f16))) int16x8_t vcvtaq_m(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_m_s32_f32))) int32x4_t vcvtaq_m_s32_f32(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_m_s32_f32))) int32x4_t vcvtaq_m(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_m_u16_f16))) uint16x8_t vcvtaq_m_u16_f16(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_m_u16_f16))) uint16x8_t vcvtaq_m(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_m_u32_f32))) uint32x4_t vcvtaq_m_u32_f32(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_m_u32_f32))) uint32x4_t vcvtaq_m(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_s16_f16))) int16x8_t vcvtaq_s16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_s32_f32))) int32x4_t vcvtaq_s32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_u16_f16))) uint16x8_t vcvtaq_u16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_u32_f32))) uint32x4_t vcvtaq_u32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_x_s16_f16))) int16x8_t vcvtaq_x_s16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_x_s32_f32))) int32x4_t vcvtaq_x_s32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_x_u16_f16))) uint16x8_t vcvtaq_x_u16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtaq_x_u32_f32))) uint32x4_t vcvtaq_x_u32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtbq_f16_f32))) float16x8_t vcvtbq_f16_f32(float16x8_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtbq_f32_f16))) float32x4_t vcvtbq_f32_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtbq_m_f16_f32))) float16x8_t vcvtbq_m_f16_f32(float16x8_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtbq_m_f32_f16))) float32x4_t vcvtbq_m_f32_f16(float32x4_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtbq_x_f32_f16))) float32x4_t vcvtbq_x_f32_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_m_s16_f16))) int16x8_t vcvtmq_m_s16_f16(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_m_s16_f16))) int16x8_t vcvtmq_m(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_m_s32_f32))) int32x4_t vcvtmq_m_s32_f32(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_m_s32_f32))) int32x4_t vcvtmq_m(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_m_u16_f16))) uint16x8_t vcvtmq_m_u16_f16(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_m_u16_f16))) uint16x8_t vcvtmq_m(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_m_u32_f32))) uint32x4_t vcvtmq_m_u32_f32(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_m_u32_f32))) uint32x4_t vcvtmq_m(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_s16_f16))) int16x8_t vcvtmq_s16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_s32_f32))) int32x4_t vcvtmq_s32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_u16_f16))) uint16x8_t vcvtmq_u16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_u32_f32))) uint32x4_t vcvtmq_u32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_x_s16_f16))) int16x8_t vcvtmq_x_s16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_x_s32_f32))) int32x4_t vcvtmq_x_s32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_x_u16_f16))) uint16x8_t vcvtmq_x_u16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtmq_x_u32_f32))) uint32x4_t vcvtmq_x_u32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_m_s16_f16))) int16x8_t vcvtnq_m_s16_f16(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_m_s16_f16))) int16x8_t vcvtnq_m(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_m_s32_f32))) int32x4_t vcvtnq_m_s32_f32(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_m_s32_f32))) int32x4_t vcvtnq_m(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_m_u16_f16))) uint16x8_t vcvtnq_m_u16_f16(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_m_u16_f16))) uint16x8_t vcvtnq_m(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_m_u32_f32))) uint32x4_t vcvtnq_m_u32_f32(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_m_u32_f32))) uint32x4_t vcvtnq_m(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_s16_f16))) int16x8_t vcvtnq_s16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_s32_f32))) int32x4_t vcvtnq_s32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_u16_f16))) uint16x8_t vcvtnq_u16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_u32_f32))) uint32x4_t vcvtnq_u32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_x_s16_f16))) int16x8_t vcvtnq_x_s16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_x_s32_f32))) int32x4_t vcvtnq_x_s32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_x_u16_f16))) uint16x8_t vcvtnq_x_u16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtnq_x_u32_f32))) uint32x4_t vcvtnq_x_u32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_m_s16_f16))) int16x8_t vcvtpq_m_s16_f16(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_m_s16_f16))) int16x8_t vcvtpq_m(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_m_s32_f32))) int32x4_t vcvtpq_m_s32_f32(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_m_s32_f32))) int32x4_t vcvtpq_m(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_m_u16_f16))) uint16x8_t vcvtpq_m_u16_f16(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_m_u16_f16))) uint16x8_t vcvtpq_m(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_m_u32_f32))) uint32x4_t vcvtpq_m_u32_f32(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_m_u32_f32))) uint32x4_t vcvtpq_m(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_s16_f16))) int16x8_t vcvtpq_s16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_s32_f32))) int32x4_t vcvtpq_s32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_u16_f16))) uint16x8_t vcvtpq_u16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_u32_f32))) uint32x4_t vcvtpq_u32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_x_s16_f16))) int16x8_t vcvtpq_x_s16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_x_s32_f32))) int32x4_t vcvtpq_x_s32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_x_u16_f16))) uint16x8_t vcvtpq_x_u16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtpq_x_u32_f32))) uint32x4_t vcvtpq_x_u32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_f16_s16))) float16x8_t vcvtq_f16_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_f16_s16))) float16x8_t vcvtq(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_f16_u16))) float16x8_t vcvtq_f16_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_f16_u16))) float16x8_t vcvtq(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_f32_s32))) float32x4_t vcvtq_f32_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_f32_s32))) float32x4_t vcvtq(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_f32_u32))) float32x4_t vcvtq_f32_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_f32_u32))) float32x4_t vcvtq(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_f16_s16))) float16x8_t vcvtq_m_f16_s16(float16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_f16_s16))) float16x8_t vcvtq_m(float16x8_t, int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_f16_u16))) float16x8_t vcvtq_m_f16_u16(float16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_f16_u16))) float16x8_t vcvtq_m(float16x8_t, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_f32_s32))) float32x4_t vcvtq_m_f32_s32(float32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_f32_s32))) float32x4_t vcvtq_m(float32x4_t, int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_f32_u32))) float32x4_t vcvtq_m_f32_u32(float32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_f32_u32))) float32x4_t vcvtq_m(float32x4_t, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_f16_s16))) float16x8_t vcvtq_m_n_f16_s16(float16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_f16_s16))) float16x8_t vcvtq_m_n(float16x8_t, int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_f16_u16))) float16x8_t vcvtq_m_n_f16_u16(float16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_f16_u16))) float16x8_t vcvtq_m_n(float16x8_t, uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_f32_s32))) float32x4_t vcvtq_m_n_f32_s32(float32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_f32_s32))) float32x4_t vcvtq_m_n(float32x4_t, int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_f32_u32))) float32x4_t vcvtq_m_n_f32_u32(float32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_f32_u32))) float32x4_t vcvtq_m_n(float32x4_t, uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_s16_f16))) int16x8_t vcvtq_m_n_s16_f16(int16x8_t, float16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_s16_f16))) int16x8_t vcvtq_m_n(int16x8_t, float16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_s32_f32))) int32x4_t vcvtq_m_n_s32_f32(int32x4_t, float32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_s32_f32))) int32x4_t vcvtq_m_n(int32x4_t, float32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_u16_f16))) uint16x8_t vcvtq_m_n_u16_f16(uint16x8_t, float16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_u16_f16))) uint16x8_t vcvtq_m_n(uint16x8_t, float16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_u32_f32))) uint32x4_t vcvtq_m_n_u32_f32(uint32x4_t, float32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_n_u32_f32))) uint32x4_t vcvtq_m_n(uint32x4_t, float32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_s16_f16))) int16x8_t vcvtq_m_s16_f16(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_s16_f16))) int16x8_t vcvtq_m(int16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_s32_f32))) int32x4_t vcvtq_m_s32_f32(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_s32_f32))) int32x4_t vcvtq_m(int32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_u16_f16))) uint16x8_t vcvtq_m_u16_f16(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_u16_f16))) uint16x8_t vcvtq_m(uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_u32_f32))) uint32x4_t vcvtq_m_u32_f32(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_m_u32_f32))) uint32x4_t vcvtq_m(uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_f16_s16))) float16x8_t vcvtq_n_f16_s16(int16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_f16_s16))) float16x8_t vcvtq_n(int16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_f16_u16))) float16x8_t vcvtq_n_f16_u16(uint16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_f16_u16))) float16x8_t vcvtq_n(uint16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_f32_s32))) float32x4_t vcvtq_n_f32_s32(int32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_f32_s32))) float32x4_t vcvtq_n(int32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_f32_u32))) float32x4_t vcvtq_n_f32_u32(uint32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_f32_u32))) float32x4_t vcvtq_n(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_s16_f16))) int16x8_t vcvtq_n_s16_f16(float16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_s32_f32))) int32x4_t vcvtq_n_s32_f32(float32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_u16_f16))) uint16x8_t vcvtq_n_u16_f16(float16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_n_u32_f32))) uint32x4_t vcvtq_n_u32_f32(float32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_s16_f16))) int16x8_t vcvtq_s16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_s32_f32))) int32x4_t vcvtq_s32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_u16_f16))) uint16x8_t vcvtq_u16_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_u32_f32))) uint32x4_t vcvtq_u32_f32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_f16_s16))) float16x8_t vcvtq_x_f16_s16(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_f16_s16))) float16x8_t vcvtq_x(int16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_f16_u16))) float16x8_t vcvtq_x_f16_u16(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_f16_u16))) float16x8_t vcvtq_x(uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_f32_s32))) float32x4_t vcvtq_x_f32_s32(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_f32_s32))) float32x4_t vcvtq_x(int32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_f32_u32))) float32x4_t vcvtq_x_f32_u32(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_f32_u32))) float32x4_t vcvtq_x(uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_f16_s16))) float16x8_t vcvtq_x_n_f16_s16(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_f16_s16))) float16x8_t vcvtq_x_n(int16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_f16_u16))) float16x8_t vcvtq_x_n_f16_u16(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_f16_u16))) float16x8_t vcvtq_x_n(uint16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_f32_s32))) float32x4_t vcvtq_x_n_f32_s32(int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_f32_s32))) float32x4_t vcvtq_x_n(int32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_f32_u32))) float32x4_t vcvtq_x_n_f32_u32(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_f32_u32))) float32x4_t vcvtq_x_n(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_s16_f16))) int16x8_t vcvtq_x_n_s16_f16(float16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_s32_f32))) int32x4_t vcvtq_x_n_s32_f32(float32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_u16_f16))) uint16x8_t vcvtq_x_n_u16_f16(float16x8_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_n_u32_f32))) uint32x4_t vcvtq_x_n_u32_f32(float32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_s16_f16))) int16x8_t vcvtq_x_s16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_s32_f32))) int32x4_t vcvtq_x_s32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_u16_f16))) uint16x8_t vcvtq_x_u16_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvtq_x_u32_f32))) uint32x4_t vcvtq_x_u32_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvttq_f16_f32))) float16x8_t vcvttq_f16_f32(float16x8_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvttq_f32_f16))) float32x4_t vcvttq_f32_f16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvttq_m_f16_f32))) float16x8_t vcvttq_m_f16_f32(float16x8_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvttq_m_f32_f16))) float32x4_t vcvttq_m_f32_f16(float32x4_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vcvttq_x_f32_f16))) float32x4_t vcvttq_x_f32_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_f16))) float16x8_t vdupq_m_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_f16))) float16x8_t vdupq_m(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_f32))) float32x4_t vdupq_m_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vdupq_m_n_f32))) float32x4_t vdupq_m(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_n_f16))) float16x8_t vdupq_n_f16(float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_n_f32))) float32x4_t vdupq_n_f32(float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_x_n_f16))) float16x8_t vdupq_x_n_f16(float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vdupq_x_n_f32))) float32x4_t vdupq_x_n_f32(float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_f16))) float16x8_t veorq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_f16))) float16x8_t veorq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_f32))) float32x4_t veorq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_f32))) float32x4_t veorq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_f16))) float16x8_t veorq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_f16))) float16x8_t veorq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_f32))) float32x4_t veorq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_m_f32))) float32x4_t veorq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_f16))) float16x8_t veorq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_f16))) float16x8_t veorq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_f32))) float32x4_t veorq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_veorq_x_f32))) float32x4_t veorq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_f16))) float16x8_t vfmaq_f16(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_f16))) float16x8_t vfmaq(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_f32))) float32x4_t vfmaq_f32(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_f32))) float32x4_t vfmaq(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_m_f16))) float16x8_t vfmaq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_m_f16))) float16x8_t vfmaq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_m_f32))) float32x4_t vfmaq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_m_f32))) float32x4_t vfmaq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_m_n_f16))) float16x8_t vfmaq_m_n_f16(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_m_n_f16))) float16x8_t vfmaq_m(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_m_n_f32))) float32x4_t vfmaq_m_n_f32(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_m_n_f32))) float32x4_t vfmaq_m(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_n_f16))) float16x8_t vfmaq_n_f16(float16x8_t, float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_n_f16))) float16x8_t vfmaq(float16x8_t, float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_n_f32))) float32x4_t vfmaq_n_f32(float32x4_t, float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmaq_n_f32))) float32x4_t vfmaq(float32x4_t, float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmasq_m_n_f16))) float16x8_t vfmasq_m_n_f16(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmasq_m_n_f16))) float16x8_t vfmasq_m(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmasq_m_n_f32))) float32x4_t vfmasq_m_n_f32(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmasq_m_n_f32))) float32x4_t vfmasq_m(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmasq_n_f16))) float16x8_t vfmasq_n_f16(float16x8_t, float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmasq_n_f16))) float16x8_t vfmasq(float16x8_t, float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmasq_n_f32))) float32x4_t vfmasq_n_f32(float32x4_t, float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmasq_n_f32))) float32x4_t vfmasq(float32x4_t, float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmsq_f16))) float16x8_t vfmsq_f16(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmsq_f16))) float16x8_t vfmsq(float16x8_t, float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmsq_f32))) float32x4_t vfmsq_f32(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmsq_f32))) float32x4_t vfmsq(float32x4_t, float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmsq_m_f16))) float16x8_t vfmsq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmsq_m_f16))) float16x8_t vfmsq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vfmsq_m_f32))) float32x4_t vfmsq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vfmsq_m_f32))) float32x4_t vfmsq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_f16))) float16_t vgetq_lane_f16(float16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_f16))) float16_t vgetq_lane(float16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_f32))) float32_t vgetq_lane_f32(float32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vgetq_lane_f32))) float32_t vgetq_lane(float32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_f16))) float16x8_t vld1q_f16(const float16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_f16))) float16x8_t vld1q(const float16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_f32))) float32x4_t vld1q_f32(const float32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_f32))) float32x4_t vld1q(const float32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_f16))) float16x8_t vld1q_z_f16(const float16_t *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_f16))) float16x8_t vld1q_z(const float16_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_f32))) float32x4_t vld1q_z_f32(const float32_t *, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld1q_z_f32))) float32x4_t vld1q_z(const float32_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld2q_f16))) float16x8x2_t vld2q_f16(const float16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld2q_f16))) float16x8x2_t vld2q(const float16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld2q_f32))) float32x4x2_t vld2q_f32(const float32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld2q_f32))) float32x4x2_t vld2q(const float32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld4q_f16))) float16x8x4_t vld4q_f16(const float16_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld4q_f16))) float16x8x4_t vld4q(const float16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vld4q_f32))) float32x4x4_t vld4q_f32(const float32_t *); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vld4q_f32))) float32x4x4_t vld4q(const float32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_f16))) float16x8_t vldrhq_f16(const float16_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_f16))) float16x8_t vldrhq_gather_offset_f16(const float16_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_f16))) float16x8_t vldrhq_gather_offset(const float16_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_f16))) float16x8_t vldrhq_gather_offset_z_f16(const float16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_offset_z_f16))) float16x8_t vldrhq_gather_offset_z(const float16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_f16))) float16x8_t vldrhq_gather_shifted_offset_f16(const float16_t *, uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_f16))) float16x8_t vldrhq_gather_shifted_offset(const float16_t *, uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_f16))) float16x8_t vldrhq_gather_shifted_offset_z_f16(const float16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_gather_shifted_offset_z_f16))) float16x8_t vldrhq_gather_shifted_offset_z(const float16_t *, uint16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrhq_z_f16))) float16x8_t vldrhq_z_f16(const float16_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_f32))) float32x4_t vldrwq_f32(const float32_t *); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_f32))) float32x4_t vldrwq_gather_base_f32(uint32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_wb_f32))) float32x4_t vldrwq_gather_base_wb_f32(uint32x4_t *, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_wb_z_f32))) float32x4_t vldrwq_gather_base_wb_z_f32(uint32x4_t *, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_base_z_f32))) float32x4_t vldrwq_gather_base_z_f32(uint32x4_t, int, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_f32))) float32x4_t vldrwq_gather_offset_f32(const float32_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_f32))) float32x4_t vldrwq_gather_offset(const float32_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_z_f32))) float32x4_t vldrwq_gather_offset_z_f32(const float32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_offset_z_f32))) float32x4_t vldrwq_gather_offset_z(const float32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_f32))) float32x4_t vldrwq_gather_shifted_offset_f32(const float32_t *, uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_f32))) float32x4_t vldrwq_gather_shifted_offset(const float32_t *, uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_f32))) float32x4_t vldrwq_gather_shifted_offset_z_f32(const float32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_gather_shifted_offset_z_f32))) float32x4_t vldrwq_gather_shifted_offset_z(const float32_t *, uint32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vldrwq_z_f32))) float32x4_t vldrwq_z_f32(const float32_t *, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmaq_f16))) float16x8_t vmaxnmaq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmaq_f16))) float16x8_t vmaxnmaq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmaq_f32))) float32x4_t vmaxnmaq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmaq_f32))) float32x4_t vmaxnmaq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmaq_m_f16))) float16x8_t vmaxnmaq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmaq_m_f16))) float16x8_t vmaxnmaq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmaq_m_f32))) float32x4_t vmaxnmaq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmaq_m_f32))) float32x4_t vmaxnmaq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmavq_f16))) float16_t vmaxnmavq_f16(float16_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmavq_f16))) float16_t vmaxnmavq(float16_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmavq_f32))) float32_t vmaxnmavq_f32(float32_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmavq_f32))) float32_t vmaxnmavq(float32_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmavq_p_f16))) float16_t vmaxnmavq_p_f16(float16_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmavq_p_f16))) float16_t vmaxnmavq_p(float16_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmavq_p_f32))) float32_t vmaxnmavq_p_f32(float32_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmavq_p_f32))) float32_t vmaxnmavq_p(float32_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_f16))) float16x8_t vmaxnmq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_f16))) float16x8_t vmaxnmq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_f32))) float32x4_t vmaxnmq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_f32))) float32x4_t vmaxnmq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_m_f16))) float16x8_t vmaxnmq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_m_f16))) float16x8_t vmaxnmq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_m_f32))) float32x4_t vmaxnmq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_m_f32))) float32x4_t vmaxnmq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_x_f16))) float16x8_t vmaxnmq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_x_f16))) float16x8_t vmaxnmq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_x_f32))) float32x4_t vmaxnmq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmq_x_f32))) float32x4_t vmaxnmq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmvq_f16))) float16_t vmaxnmvq_f16(float16_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmvq_f16))) float16_t vmaxnmvq(float16_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmvq_f32))) float32_t vmaxnmvq_f32(float32_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmvq_f32))) float32_t vmaxnmvq(float32_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmvq_p_f16))) float16_t vmaxnmvq_p_f16(float16_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmvq_p_f16))) float16_t vmaxnmvq_p(float16_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmvq_p_f32))) float32_t vmaxnmvq_p_f32(float32_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmaxnmvq_p_f32))) float32_t vmaxnmvq_p(float32_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmaq_f16))) float16x8_t vminnmaq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmaq_f16))) float16x8_t vminnmaq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmaq_f32))) float32x4_t vminnmaq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmaq_f32))) float32x4_t vminnmaq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmaq_m_f16))) float16x8_t vminnmaq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmaq_m_f16))) float16x8_t vminnmaq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmaq_m_f32))) float32x4_t vminnmaq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmaq_m_f32))) float32x4_t vminnmaq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmavq_f16))) float16_t vminnmavq_f16(float16_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmavq_f16))) float16_t vminnmavq(float16_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmavq_f32))) float32_t vminnmavq_f32(float32_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmavq_f32))) float32_t vminnmavq(float32_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmavq_p_f16))) float16_t vminnmavq_p_f16(float16_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmavq_p_f16))) float16_t vminnmavq_p(float16_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmavq_p_f32))) float32_t vminnmavq_p_f32(float32_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmavq_p_f32))) float32_t vminnmavq_p(float32_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_f16))) float16x8_t vminnmq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_f16))) float16x8_t vminnmq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_f32))) float32x4_t vminnmq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_f32))) float32x4_t vminnmq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_m_f16))) float16x8_t vminnmq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_m_f16))) float16x8_t vminnmq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_m_f32))) float32x4_t vminnmq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_m_f32))) float32x4_t vminnmq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_x_f16))) float16x8_t vminnmq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_x_f16))) float16x8_t vminnmq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_x_f32))) float32x4_t vminnmq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmq_x_f32))) float32x4_t vminnmq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmvq_f16))) float16_t vminnmvq_f16(float16_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmvq_f16))) float16_t vminnmvq(float16_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmvq_f32))) float32_t vminnmvq_f32(float32_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmvq_f32))) float32_t vminnmvq(float32_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmvq_p_f16))) float16_t vminnmvq_p_f16(float16_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmvq_p_f16))) float16_t vminnmvq_p(float16_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vminnmvq_p_f32))) float32_t vminnmvq_p_f32(float32_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vminnmvq_p_f32))) float32_t vminnmvq_p(float32_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_f16))) float16x8_t vmulq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_f16))) float16x8_t vmulq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_f32))) float32x4_t vmulq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_f32))) float32x4_t vmulq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_f16))) float16x8_t vmulq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_f16))) float16x8_t vmulq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_f32))) float32x4_t vmulq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_f32))) float32x4_t vmulq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_f16))) float16x8_t vmulq_m_n_f16(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_f16))) float16x8_t vmulq_m(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_f32))) float32x4_t vmulq_m_n_f32(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_m_n_f32))) float32x4_t vmulq_m(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_f16))) float16x8_t vmulq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_f16))) float16x8_t vmulq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_f32))) float32x4_t vmulq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_n_f32))) float32x4_t vmulq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_f16))) float16x8_t vmulq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_f16))) float16x8_t vmulq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_f32))) float32x4_t vmulq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_f32))) float32x4_t vmulq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_f16))) float16x8_t vmulq_x_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_f16))) float16x8_t vmulq_x(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_f32))) float32x4_t vmulq_x_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vmulq_x_n_f32))) float32x4_t vmulq_x(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_f16))) float16x8_t vnegq_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_f16))) float16x8_t vnegq(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_f32))) float32x4_t vnegq_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_f32))) float32x4_t vnegq(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_f16))) float16x8_t vnegq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_f16))) float16x8_t vnegq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_f32))) float32x4_t vnegq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_m_f32))) float32x4_t vnegq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_f16))) float16x8_t vnegq_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_f16))) float16x8_t vnegq_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_f32))) float32x4_t vnegq_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vnegq_x_f32))) float32x4_t vnegq_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_f16))) float16x8_t vornq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_f16))) float16x8_t vornq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_f32))) float32x4_t vornq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_f32))) float32x4_t vornq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_f16))) float16x8_t vornq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_f16))) float16x8_t vornq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_f32))) float32x4_t vornq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_m_f32))) float32x4_t vornq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_f16))) float16x8_t vornq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_f16))) float16x8_t vornq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_f32))) float32x4_t vornq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vornq_x_f32))) float32x4_t vornq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_f16))) float16x8_t vorrq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_f16))) float16x8_t vorrq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_f32))) float32x4_t vorrq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_f32))) float32x4_t vorrq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_f16))) float16x8_t vorrq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_f16))) float16x8_t vorrq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_f32))) float32x4_t vorrq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_m_f32))) float32x4_t vorrq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_f16))) float16x8_t vorrq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_f16))) float16x8_t vorrq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_f32))) float32x4_t vorrq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vorrq_x_f32))) float32x4_t vorrq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_f16))) float16x8_t vpselq_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_f16))) float16x8_t vpselq(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vpselq_f32))) float32x4_t vpselq_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vpselq_f32))) float32x4_t vpselq(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_f32))) float16x8_t vreinterpretq_f16_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_f32))) float16x8_t vreinterpretq_f16(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_s16))) float16x8_t vreinterpretq_f16_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_s16))) float16x8_t vreinterpretq_f16(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_s32))) float16x8_t vreinterpretq_f16_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_s32))) float16x8_t vreinterpretq_f16(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_s64))) float16x8_t vreinterpretq_f16_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_s64))) float16x8_t vreinterpretq_f16(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_s8))) float16x8_t vreinterpretq_f16_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_s8))) float16x8_t vreinterpretq_f16(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_u16))) float16x8_t vreinterpretq_f16_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_u16))) float16x8_t vreinterpretq_f16(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_u32))) float16x8_t vreinterpretq_f16_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_u32))) float16x8_t vreinterpretq_f16(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_u64))) float16x8_t vreinterpretq_f16_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_u64))) float16x8_t vreinterpretq_f16(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_u8))) float16x8_t vreinterpretq_f16_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f16_u8))) float16x8_t vreinterpretq_f16(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_f16))) float32x4_t vreinterpretq_f32_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_f16))) float32x4_t vreinterpretq_f32(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_s16))) float32x4_t vreinterpretq_f32_s16(int16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_s16))) float32x4_t vreinterpretq_f32(int16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_s32))) float32x4_t vreinterpretq_f32_s32(int32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_s32))) float32x4_t vreinterpretq_f32(int32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_s64))) float32x4_t vreinterpretq_f32_s64(int64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_s64))) float32x4_t vreinterpretq_f32(int64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_s8))) float32x4_t vreinterpretq_f32_s8(int8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_s8))) float32x4_t vreinterpretq_f32(int8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_u16))) float32x4_t vreinterpretq_f32_u16(uint16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_u16))) float32x4_t vreinterpretq_f32(uint16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_u32))) float32x4_t vreinterpretq_f32_u32(uint32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_u32))) float32x4_t vreinterpretq_f32(uint32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_u64))) float32x4_t vreinterpretq_f32_u64(uint64x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_u64))) float32x4_t vreinterpretq_f32(uint64x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_u8))) float32x4_t vreinterpretq_f32_u8(uint8x16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_f32_u8))) float32x4_t vreinterpretq_f32(uint8x16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_f16))) int16x8_t vreinterpretq_s16_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_f16))) int16x8_t vreinterpretq_s16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_f32))) int16x8_t vreinterpretq_s16_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s16_f32))) int16x8_t vreinterpretq_s16(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_f16))) int32x4_t vreinterpretq_s32_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_f16))) int32x4_t vreinterpretq_s32(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_f32))) int32x4_t vreinterpretq_s32_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s32_f32))) int32x4_t vreinterpretq_s32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_f16))) int64x2_t vreinterpretq_s64_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_f16))) int64x2_t vreinterpretq_s64(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_f32))) int64x2_t vreinterpretq_s64_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s64_f32))) int64x2_t vreinterpretq_s64(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_f16))) int8x16_t vreinterpretq_s8_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_f16))) int8x16_t vreinterpretq_s8(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_f32))) int8x16_t vreinterpretq_s8_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_s8_f32))) int8x16_t vreinterpretq_s8(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_f16))) uint16x8_t vreinterpretq_u16_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_f16))) uint16x8_t vreinterpretq_u16(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_f32))) uint16x8_t vreinterpretq_u16_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u16_f32))) uint16x8_t vreinterpretq_u16(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_f16))) uint32x4_t vreinterpretq_u32_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_f16))) uint32x4_t vreinterpretq_u32(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_f32))) uint32x4_t vreinterpretq_u32_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u32_f32))) uint32x4_t vreinterpretq_u32(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_f16))) uint64x2_t vreinterpretq_u64_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_f16))) uint64x2_t vreinterpretq_u64(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_f32))) uint64x2_t vreinterpretq_u64_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u64_f32))) uint64x2_t vreinterpretq_u64(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_f16))) uint8x16_t vreinterpretq_u8_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_f16))) uint8x16_t vreinterpretq_u8(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_f32))) uint8x16_t vreinterpretq_u8_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vreinterpretq_u8_f32))) uint8x16_t vreinterpretq_u8(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_f16))) float16x8_t vrev32q_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_f16))) float16x8_t vrev32q(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_f16))) float16x8_t vrev32q_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_m_f16))) float16x8_t vrev32q_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_f16))) float16x8_t vrev32q_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev32q_x_f16))) float16x8_t vrev32q_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_f16))) float16x8_t vrev64q_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_f16))) float16x8_t vrev64q(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_f32))) float32x4_t vrev64q_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_f32))) float32x4_t vrev64q(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_f16))) float16x8_t vrev64q_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_f16))) float16x8_t vrev64q_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_f32))) float32x4_t vrev64q_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_m_f32))) float32x4_t vrev64q_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_f16))) float16x8_t vrev64q_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_f16))) float16x8_t vrev64q_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_f32))) float32x4_t vrev64q_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrev64q_x_f32))) float32x4_t vrev64q_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_f16))) float16x8_t vrndaq_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_f16))) float16x8_t vrndaq(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_f32))) float32x4_t vrndaq_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_f32))) float32x4_t vrndaq(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_m_f16))) float16x8_t vrndaq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_m_f16))) float16x8_t vrndaq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_m_f32))) float32x4_t vrndaq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_m_f32))) float32x4_t vrndaq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_x_f16))) float16x8_t vrndaq_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_x_f16))) float16x8_t vrndaq_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_x_f32))) float32x4_t vrndaq_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndaq_x_f32))) float32x4_t vrndaq_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_f16))) float16x8_t vrndmq_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_f16))) float16x8_t vrndmq(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_f32))) float32x4_t vrndmq_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_f32))) float32x4_t vrndmq(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_m_f16))) float16x8_t vrndmq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_m_f16))) float16x8_t vrndmq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_m_f32))) float32x4_t vrndmq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_m_f32))) float32x4_t vrndmq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_x_f16))) float16x8_t vrndmq_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_x_f16))) float16x8_t vrndmq_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_x_f32))) float32x4_t vrndmq_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndmq_x_f32))) float32x4_t vrndmq_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_f16))) float16x8_t vrndnq_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_f16))) float16x8_t vrndnq(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_f32))) float32x4_t vrndnq_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_f32))) float32x4_t vrndnq(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_m_f16))) float16x8_t vrndnq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_m_f16))) float16x8_t vrndnq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_m_f32))) float32x4_t vrndnq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_m_f32))) float32x4_t vrndnq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_x_f16))) float16x8_t vrndnq_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_x_f16))) float16x8_t vrndnq_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_x_f32))) float32x4_t vrndnq_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndnq_x_f32))) float32x4_t vrndnq_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_f16))) float16x8_t vrndpq_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_f16))) float16x8_t vrndpq(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_f32))) float32x4_t vrndpq_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_f32))) float32x4_t vrndpq(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_m_f16))) float16x8_t vrndpq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_m_f16))) float16x8_t vrndpq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_m_f32))) float32x4_t vrndpq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_m_f32))) float32x4_t vrndpq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_x_f16))) float16x8_t vrndpq_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_x_f16))) float16x8_t vrndpq_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_x_f32))) float32x4_t vrndpq_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndpq_x_f32))) float32x4_t vrndpq_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndq_f16))) float16x8_t vrndq_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndq_f16))) float16x8_t vrndq(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndq_f32))) float32x4_t vrndq_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndq_f32))) float32x4_t vrndq(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndq_m_f16))) float16x8_t vrndq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndq_m_f16))) float16x8_t vrndq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndq_m_f32))) float32x4_t vrndq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndq_m_f32))) float32x4_t vrndq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndq_x_f16))) float16x8_t vrndq_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndq_x_f16))) float16x8_t vrndq_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndq_x_f32))) float32x4_t vrndq_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndq_x_f32))) float32x4_t vrndq_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_f16))) float16x8_t vrndxq_f16(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_f16))) float16x8_t vrndxq(float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_f32))) float32x4_t vrndxq_f32(float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_f32))) float32x4_t vrndxq(float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_m_f16))) float16x8_t vrndxq_m_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_m_f16))) float16x8_t vrndxq_m(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_m_f32))) float32x4_t vrndxq_m_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_m_f32))) float32x4_t vrndxq_m(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_x_f16))) float16x8_t vrndxq_x_f16(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_x_f16))) float16x8_t vrndxq_x(float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_x_f32))) float32x4_t vrndxq_x_f32(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vrndxq_x_f32))) float32x4_t vrndxq_x(float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_f16))) float16x8_t vsetq_lane_f16(float16_t, float16x8_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_f16))) float16x8_t vsetq_lane(float16_t, float16x8_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_f32))) float32x4_t vsetq_lane_f32(float32_t, float32x4_t, int); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsetq_lane_f32))) float32x4_t vsetq_lane(float32_t, float32x4_t, int); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_f16))) void vst1q_f16(float16_t *, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_f16))) void vst1q(float16_t *, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_f32))) void vst1q_f32(float32_t *, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_f32))) void vst1q(float32_t *, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_f16))) void vst1q_p_f16(float16_t *, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_f16))) void vst1q_p(float16_t *, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_f32))) void vst1q_p_f32(float32_t *, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst1q_p_f32))) void vst1q_p(float32_t *, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst2q_f16))) void vst2q_f16(float16_t *, float16x8x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst2q_f16))) void vst2q(float16_t *, float16x8x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst2q_f32))) void vst2q_f32(float32_t *, float32x4x2_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst2q_f32))) void vst2q(float32_t *, float32x4x2_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst4q_f16))) void vst4q_f16(float16_t *, float16x8x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst4q_f16))) void vst4q(float16_t *, float16x8x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vst4q_f32))) void vst4q_f32(float32_t *, float32x4x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vst4q_f32))) void vst4q(float32_t *, float32x4x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_f16))) void vstrhq_f16(float16_t *, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_f16))) void vstrhq(float16_t *, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_f16))) void vstrhq_p_f16(float16_t *, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_p_f16))) void vstrhq_p(float16_t *, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_f16))) void vstrhq_scatter_offset_f16(float16_t *, uint16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_f16))) void vstrhq_scatter_offset(float16_t *, uint16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_f16))) void vstrhq_scatter_offset_p_f16(float16_t *, uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_offset_p_f16))) void vstrhq_scatter_offset_p(float16_t *, uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_f16))) void vstrhq_scatter_shifted_offset_f16(float16_t *, uint16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_f16))) void vstrhq_scatter_shifted_offset(float16_t *, uint16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_f16))) void vstrhq_scatter_shifted_offset_p_f16(float16_t *, uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrhq_scatter_shifted_offset_p_f16))) void vstrhq_scatter_shifted_offset_p(float16_t *, uint16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_f32))) void vstrwq_f32(float32_t *, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_f32))) void vstrwq(float32_t *, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_p_f32))) void vstrwq_p_f32(float32_t *, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_p_f32))) void vstrwq_p(float32_t *, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_f32))) void vstrwq_scatter_base_f32(uint32x4_t, int, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_f32))) void vstrwq_scatter_base(uint32x4_t, int, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_p_f32))) void vstrwq_scatter_base_p_f32(uint32x4_t, int, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_p_f32))) void vstrwq_scatter_base_p(uint32x4_t, int, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_f32))) void vstrwq_scatter_base_wb_f32(uint32x4_t *, int, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_f32))) void vstrwq_scatter_base_wb(uint32x4_t *, int, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_f32))) void vstrwq_scatter_base_wb_p_f32(uint32x4_t *, int, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_base_wb_p_f32))) void vstrwq_scatter_base_wb_p(uint32x4_t *, int, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_f32))) void vstrwq_scatter_offset_f32(float32_t *, uint32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_f32))) void vstrwq_scatter_offset(float32_t *, uint32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_f32))) void vstrwq_scatter_offset_p_f32(float32_t *, uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_offset_p_f32))) void vstrwq_scatter_offset_p(float32_t *, uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_f32))) void vstrwq_scatter_shifted_offset_f32(float32_t *, uint32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_f32))) void vstrwq_scatter_shifted_offset(float32_t *, uint32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_f32))) void vstrwq_scatter_shifted_offset_p_f32(float32_t *, uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vstrwq_scatter_shifted_offset_p_f32))) void vstrwq_scatter_shifted_offset_p(float32_t *, uint32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_f16))) float16x8_t vsubq_f16(float16x8_t, float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_f16))) float16x8_t vsubq(float16x8_t, float16x8_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_f32))) float32x4_t vsubq_f32(float32x4_t, float32x4_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_f32))) float32x4_t vsubq(float32x4_t, float32x4_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_f16))) float16x8_t vsubq_m_f16(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_f16))) float16x8_t vsubq_m(float16x8_t, float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_f32))) float32x4_t vsubq_m_f32(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_f32))) float32x4_t vsubq_m(float32x4_t, float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_f16))) float16x8_t vsubq_m_n_f16(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_f16))) float16x8_t vsubq_m(float16x8_t, float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_f32))) float32x4_t vsubq_m_n_f32(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_m_n_f32))) float32x4_t vsubq_m(float32x4_t, float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_f16))) float16x8_t vsubq_n_f16(float16x8_t, float16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_f16))) float16x8_t vsubq(float16x8_t, float16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_f32))) float32x4_t vsubq_n_f32(float32x4_t, float32_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_n_f32))) float32x4_t vsubq(float32x4_t, float32_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_f16))) float16x8_t vsubq_x_f16(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_f16))) float16x8_t vsubq_x(float16x8_t, float16x8_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_f32))) float32x4_t vsubq_x_f32(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_f32))) float32x4_t vsubq_x(float32x4_t, float32x4_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_f16))) float16x8_t vsubq_x_n_f16(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_f16))) float16x8_t vsubq_x(float16x8_t, float16_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_f32))) float32x4_t vsubq_x_n_f32(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vsubq_x_n_f32))) float32x4_t vsubq_x(float32x4_t, float32_t, mve_pred16_t); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_f16))) float16x8_t vuninitializedq_f16(); static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_f32))) float32x4_t vuninitializedq_f32(); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_f16))) float16x8_t vuninitializedq(float16x8_t); static __inline__ __attribute__((__overloadable__, __clang_arm_builtin_alias(__builtin_arm_mve_vuninitializedq_polymorphic_f32))) float32x4_t vuninitializedq(float32x4_t); #endif /* (__ARM_FEATURE_MVE & 2) && (!defined __ARM_MVE_PRESERVE_USER_NAMESPACE) */ #ifdef __cplusplus } /* extern "C" */ #endif #endif /* __ARM_MVE_H */ /builtins/arm_neon_sve_bridge.h/*===---- arm_neon_sve_bridge.h - ARM NEON SVE Bridge intrinsics -----------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __ARM_NEON_SVE_BRIDGE_H #define __ARM_NEON_SVE_BRIDGE_H #include #include #ifdef __cplusplus extern "C" { #endif /* Function attributes */ #define __ai static __inline__ __attribute__((__always_inline__, __nodebug__)) #define __aio \ static __inline__ \ __attribute__((__always_inline__, __nodebug__, __overloadable__)) __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s8))) svint8_t svset_neonq(svint8_t, int8x16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s16))) svint16_t svset_neonq(svint16_t, int16x8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s32))) svint32_t svset_neonq(svint32_t, int32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s64))) svint64_t svset_neonq(svint64_t, int64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u8))) svuint8_t svset_neonq(svuint8_t, uint8x16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u16))) svuint16_t svset_neonq(svuint16_t, uint16x8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u32))) svuint32_t svset_neonq(svuint32_t, uint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u64))) svuint64_t svset_neonq(svuint64_t, uint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_f16))) svfloat16_t svset_neonq(svfloat16_t, float16x8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_f32))) svfloat32_t svset_neonq(svfloat32_t, float32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_f64))) svfloat64_t svset_neonq(svfloat64_t, float64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s8))) svint8_t svset_neonq_s8(svint8_t, int8x16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s16))) svint16_t svset_neonq_s16(svint16_t, int16x8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s32))) svint32_t svset_neonq_s32(svint32_t, int32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s64))) svint64_t svset_neonq_s64(svint64_t, int64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u8))) svuint8_t svset_neonq_u8(svuint8_t, uint8x16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u16))) svuint16_t svset_neonq_u16(svuint16_t, uint16x8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u32))) svuint32_t svset_neonq_u32(svuint32_t, uint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u64))) svuint64_t svset_neonq_u64(svuint64_t, uint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_f16))) svfloat16_t svset_neonq_f16(svfloat16_t, float16x8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_f32))) svfloat32_t svset_neonq_f32(svfloat32_t, float32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_f64))) svfloat64_t svset_neonq_f64(svfloat64_t, float64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s8))) int8x16_t svget_neonq(svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s16))) int16x8_t svget_neonq(svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s32))) int32x4_t svget_neonq(svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s64))) int64x2_t svget_neonq(svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u8))) uint8x16_t svget_neonq(svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u16))) uint16x8_t svget_neonq(svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u32))) uint32x4_t svget_neonq(svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u64))) uint64x2_t svget_neonq(svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_f16))) float16x8_t svget_neonq(svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_f32))) float32x4_t svget_neonq(svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_f64))) float64x2_t svget_neonq(svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s8))) int8x16_t svget_neonq_s8(svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s16))) int16x8_t svget_neonq_s16(svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s32))) int32x4_t svget_neonq_s32(svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s64))) int64x2_t svget_neonq_s64(svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u8))) uint8x16_t svget_neonq_u8(svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u16))) uint16x8_t svget_neonq_u16(svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u32))) uint32x4_t svget_neonq_u32(svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u64))) uint64x2_t svget_neonq_u64(svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_f16))) float16x8_t svget_neonq_f16(svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_f32))) float32x4_t svget_neonq_f32(svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_f64))) float64x2_t svget_neonq_f64(svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s8))) svint8_t svdup_neonq(int8x16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s16))) svint16_t svdup_neonq(int16x8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s32))) svint32_t svdup_neonq(int32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s64))) svint64_t svdup_neonq(int64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u8))) svuint8_t svdup_neonq(uint8x16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u16))) svuint16_t svdup_neonq(uint16x8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u32))) svuint32_t svdup_neonq(uint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u64))) svuint64_t svdup_neonq(uint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_f16))) svfloat16_t svdup_neonq(float16x8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_f32))) svfloat32_t svdup_neonq(float32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_f64))) svfloat64_t svdup_neonq(float64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s8))) svint8_t svdup_neonq_s8(int8x16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s16))) svint16_t svdup_neonq_s16(int16x8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s32))) svint32_t svdup_neonq_s32(int32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s64))) svint64_t svdup_neonq_s64(int64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u8))) svuint8_t svdup_neonq_u8(uint8x16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u16))) svuint16_t svdup_neonq_u16(uint16x8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u32))) svuint32_t svdup_neonq_u32(uint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u64))) svuint64_t svdup_neonq_u64(uint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_f16))) svfloat16_t svdup_neonq_f16(float16x8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_f32))) svfloat32_t svdup_neonq_f32(float32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_f64))) svfloat64_t svdup_neonq_f64(float64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_bf16))) svbfloat16_t svset_neonq(svbfloat16_t, bfloat16x8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_bf16))) svbfloat16_t svset_neonq_bf16(svbfloat16_t, bfloat16x8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_bf16))) bfloat16x8_t svget_neonq(svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_bf16))) bfloat16x8_t svget_neonq_bf16(svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_bf16))) svbfloat16_t svdup_neonq(bfloat16x8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_bf16))) svbfloat16_t svdup_neonq_bf16(bfloat16x8_t); #undef __ai #undef __aio #ifdef __cplusplus } // extern "C" #endif #endif //__ARM_NEON_SVE_BRIDGE_H /builtins/arm_sme_draft_spec_subject_to_change.h/*===---- arm_sme_draft_spec_subject_to_change.h - ARM SME intrinsics ------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __ARM_SME_H #define __ARM_SME_H #if !defined(__LITTLE_ENDIAN__) #error "Big endian is currently not supported for arm_sme_draft_spec_subject_to_change.h" #endif #include /* Function attributes */ #define __ai static __inline__ __attribute__((__always_inline__, __nodebug__)) #define __aio static __inline__ __attribute__((__always_inline__, __nodebug__, __overloadable__)) #ifdef __cplusplus extern "C" { #endif __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_u32_m), arm_streaming, arm_shared_za)) void svaddha_za32_u32_m(uint64_t, svbool_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_s32_m), arm_streaming, arm_shared_za)) void svaddha_za32_s32_m(uint64_t, svbool_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_u32_m), arm_streaming, arm_shared_za)) void svaddva_za32_u32_m(uint64_t, svbool_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_s32_m), arm_streaming, arm_shared_za)) void svaddva_za32_s32_m(uint64_t, svbool_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsb), arm_streaming_compatible, arm_preserves_za)) uint64_t svcntsb(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsd), arm_streaming_compatible, arm_preserves_za)) uint64_t svcntsd(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsh), arm_streaming_compatible, arm_preserves_za)) uint64_t svcntsh(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svcntsw), arm_streaming_compatible, arm_preserves_za)) uint64_t svcntsw(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za128), arm_streaming, arm_shared_za)) void svld1_hor_vnum_za128(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za16), arm_streaming, arm_shared_za)) void svld1_hor_vnum_za16(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za32), arm_streaming, arm_shared_za)) void svld1_hor_vnum_za32(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za64), arm_streaming, arm_shared_za)) void svld1_hor_vnum_za64(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_vnum_za8), arm_streaming, arm_shared_za)) void svld1_hor_vnum_za8(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za128), arm_streaming, arm_shared_za)) void svld1_hor_za128(uint64_t, uint32_t, uint64_t, svbool_t, void const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za16), arm_streaming, arm_shared_za)) void svld1_hor_za16(uint64_t, uint32_t, uint64_t, svbool_t, void const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za32), arm_streaming, arm_shared_za)) void svld1_hor_za32(uint64_t, uint32_t, uint64_t, svbool_t, void const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za64), arm_streaming, arm_shared_za)) void svld1_hor_za64(uint64_t, uint32_t, uint64_t, svbool_t, void const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_hor_za8), arm_streaming, arm_shared_za)) void svld1_hor_za8(uint64_t, uint32_t, uint64_t, svbool_t, void const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za128), arm_streaming, arm_shared_za)) void svld1_ver_vnum_za128(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za16), arm_streaming, arm_shared_za)) void svld1_ver_vnum_za16(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za32), arm_streaming, arm_shared_za)) void svld1_ver_vnum_za32(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za64), arm_streaming, arm_shared_za)) void svld1_ver_vnum_za64(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_vnum_za8), arm_streaming, arm_shared_za)) void svld1_ver_vnum_za8(uint64_t, uint32_t, uint64_t, svbool_t, void const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za128), arm_streaming, arm_shared_za)) void svld1_ver_za128(uint64_t, uint32_t, uint64_t, svbool_t, void const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za16), arm_streaming, arm_shared_za)) void svld1_ver_za16(uint64_t, uint32_t, uint64_t, svbool_t, void const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za32), arm_streaming, arm_shared_za)) void svld1_ver_za32(uint64_t, uint32_t, uint64_t, svbool_t, void const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za64), arm_streaming, arm_shared_za)) void svld1_ver_za64(uint64_t, uint32_t, uint64_t, svbool_t, void const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svld1_ver_za8), arm_streaming, arm_shared_za)) void svld1_ver_za8(uint64_t, uint32_t, uint64_t, svbool_t, void const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f16_m), arm_streaming, arm_shared_za)) void svmopa_za32_f16_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_bf16_m), arm_streaming, arm_shared_za)) void svmopa_za32_bf16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f32_m), arm_streaming, arm_shared_za)) void svmopa_za32_f32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_s8_m), arm_streaming, arm_shared_za)) void svmopa_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_u8_m), arm_streaming, arm_shared_za)) void svmopa_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f16_m), arm_streaming, arm_shared_za)) void svmops_za32_f16_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_bf16_m), arm_streaming, arm_shared_za)) void svmops_za32_bf16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f32_m), arm_streaming, arm_shared_za)) void svmops_za32_f32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_s8_m), arm_streaming, arm_shared_za)) void svmops_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_u8_m), arm_streaming, arm_shared_za)) void svmops_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u8_m), arm_streaming, arm_shared_za, arm_preserves_za)) svuint8_t svread_hor_za128_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u32_m), arm_streaming, arm_shared_za, arm_preserves_za)) svuint32_t svread_hor_za128_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u64_m), arm_streaming, arm_shared_za, arm_preserves_za)) svuint64_t svread_hor_za128_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u16_m), arm_streaming, arm_shared_za, arm_preserves_za)) svuint16_t svread_hor_za128_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za)) svbfloat16_t svread_hor_za128_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s8_m), arm_streaming, arm_shared_za, arm_preserves_za)) svint8_t svread_hor_za128_s8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f64_m), arm_streaming, arm_shared_za, arm_preserves_za)) svfloat64_t svread_hor_za128_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f32_m), arm_streaming, arm_shared_za, arm_preserves_za)) svfloat32_t svread_hor_za128_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f16_m), arm_streaming, arm_shared_za, arm_preserves_za)) svfloat16_t svread_hor_za128_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s32_m), arm_streaming, arm_shared_za, arm_preserves_za)) svint32_t svread_hor_za128_s32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s64_m), arm_streaming, arm_shared_za, arm_preserves_za)) svint64_t svread_hor_za128_s64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s16_m), arm_streaming, arm_shared_za, arm_preserves_za)) svint16_t svread_hor_za128_s16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_u16_m), arm_streaming, arm_shared_za, arm_preserves_za)) svuint16_t svread_hor_za16_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za)) svbfloat16_t svread_hor_za16_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_f16_m), arm_streaming, arm_shared_za, arm_preserves_za)) svfloat16_t svread_hor_za16_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_s16_m), arm_streaming, arm_shared_za, arm_preserves_za)) svint16_t svread_hor_za16_s16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_u32_m), arm_streaming, arm_shared_za, arm_preserves_za)) svuint32_t svread_hor_za32_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_f32_m), arm_streaming, arm_shared_za, arm_preserves_za)) svfloat32_t svread_hor_za32_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_s32_m), arm_streaming, arm_shared_za, arm_preserves_za)) svint32_t svread_hor_za32_s32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_u64_m), arm_streaming, arm_shared_za, arm_preserves_za)) svuint64_t svread_hor_za64_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_f64_m), arm_streaming, arm_shared_za, arm_preserves_za)) svfloat64_t svread_hor_za64_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_s64_m), arm_streaming, arm_shared_za, arm_preserves_za)) svint64_t svread_hor_za64_s64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_u8_m), arm_streaming, arm_shared_za, arm_preserves_za)) svuint8_t svread_hor_za8_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_s8_m), arm_streaming, arm_shared_za, arm_preserves_za)) svint8_t svread_hor_za8_s8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u8_m), arm_streaming, arm_shared_za, arm_preserves_za)) svuint8_t svread_ver_za128_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u32_m), arm_streaming, arm_shared_za, arm_preserves_za)) svuint32_t svread_ver_za128_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u64_m), arm_streaming, arm_shared_za, arm_preserves_za)) svuint64_t svread_ver_za128_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u16_m), arm_streaming, arm_shared_za, arm_preserves_za)) svuint16_t svread_ver_za128_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za)) svbfloat16_t svread_ver_za128_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s8_m), arm_streaming, arm_shared_za, arm_preserves_za)) svint8_t svread_ver_za128_s8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f64_m), arm_streaming, arm_shared_za, arm_preserves_za)) svfloat64_t svread_ver_za128_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f32_m), arm_streaming, arm_shared_za, arm_preserves_za)) svfloat32_t svread_ver_za128_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f16_m), arm_streaming, arm_shared_za, arm_preserves_za)) svfloat16_t svread_ver_za128_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s32_m), arm_streaming, arm_shared_za, arm_preserves_za)) svint32_t svread_ver_za128_s32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s64_m), arm_streaming, arm_shared_za, arm_preserves_za)) svint64_t svread_ver_za128_s64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s16_m), arm_streaming, arm_shared_za, arm_preserves_za)) svint16_t svread_ver_za128_s16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_u16_m), arm_streaming, arm_shared_za, arm_preserves_za)) svuint16_t svread_ver_za16_u16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za)) svbfloat16_t svread_ver_za16_bf16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_f16_m), arm_streaming, arm_shared_za, arm_preserves_za)) svfloat16_t svread_ver_za16_f16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_s16_m), arm_streaming, arm_shared_za, arm_preserves_za)) svint16_t svread_ver_za16_s16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_u32_m), arm_streaming, arm_shared_za, arm_preserves_za)) svuint32_t svread_ver_za32_u32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_f32_m), arm_streaming, arm_shared_za, arm_preserves_za)) svfloat32_t svread_ver_za32_f32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_s32_m), arm_streaming, arm_shared_za, arm_preserves_za)) svint32_t svread_ver_za32_s32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_u64_m), arm_streaming, arm_shared_za, arm_preserves_za)) svuint64_t svread_ver_za64_u64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_f64_m), arm_streaming, arm_shared_za, arm_preserves_za)) svfloat64_t svread_ver_za64_f64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_s64_m), arm_streaming, arm_shared_za, arm_preserves_za)) svint64_t svread_ver_za64_s64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_u8_m), arm_streaming, arm_shared_za, arm_preserves_za)) svuint8_t svread_ver_za8_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_s8_m), arm_streaming, arm_shared_za, arm_preserves_za)) svint8_t svread_ver_za8_s8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za128), arm_streaming, arm_shared_za, arm_preserves_za)) void svst1_hor_vnum_za128(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za16), arm_streaming, arm_shared_za, arm_preserves_za)) void svst1_hor_vnum_za16(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za32), arm_streaming, arm_shared_za, arm_preserves_za)) void svst1_hor_vnum_za32(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za64), arm_streaming, arm_shared_za, arm_preserves_za)) void svst1_hor_vnum_za64(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za8), arm_streaming, arm_shared_za, arm_preserves_za)) void svst1_hor_vnum_za8(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za128), arm_streaming, arm_shared_za, arm_preserves_za)) void svst1_hor_za128(uint64_t, uint32_t, uint64_t, svbool_t, void *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za16), arm_streaming, arm_shared_za, arm_preserves_za)) void svst1_hor_za16(uint64_t, uint32_t, uint64_t, svbool_t, void *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za32), arm_streaming, arm_shared_za, arm_preserves_za)) void svst1_hor_za32(uint64_t, uint32_t, uint64_t, svbool_t, void *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za64), arm_streaming, arm_shared_za, arm_preserves_za)) void svst1_hor_za64(uint64_t, uint32_t, uint64_t, svbool_t, void *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_za8), arm_streaming, arm_shared_za, arm_preserves_za)) void svst1_hor_za8(uint64_t, uint32_t, uint64_t, svbool_t, void *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za128), arm_streaming, arm_shared_za, arm_preserves_za)) void svst1_ver_vnum_za128(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za16), arm_streaming, arm_shared_za, arm_preserves_za)) void svst1_ver_vnum_za16(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za32), arm_streaming, arm_shared_za, arm_preserves_za)) void svst1_ver_vnum_za32(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za64), arm_streaming, arm_shared_za, arm_preserves_za)) void svst1_ver_vnum_za64(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_vnum_za8), arm_streaming, arm_shared_za, arm_preserves_za)) void svst1_ver_vnum_za8(uint64_t, uint32_t, uint64_t, svbool_t, void *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za128), arm_streaming, arm_shared_za, arm_preserves_za)) void svst1_ver_za128(uint64_t, uint32_t, uint64_t, svbool_t, void *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za16), arm_streaming, arm_shared_za, arm_preserves_za)) void svst1_ver_za16(uint64_t, uint32_t, uint64_t, svbool_t, void *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za32), arm_streaming, arm_shared_za, arm_preserves_za)) void svst1_ver_za32(uint64_t, uint32_t, uint64_t, svbool_t, void *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za64), arm_streaming, arm_shared_za, arm_preserves_za)) void svst1_ver_za64(uint64_t, uint32_t, uint64_t, svbool_t, void *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_ver_za8), arm_streaming, arm_shared_za, arm_preserves_za)) void svst1_ver_za8(uint64_t, uint32_t, uint64_t, svbool_t, void *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za32_s8_m), arm_streaming, arm_shared_za)) void svsumopa_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za32_s8_m), arm_streaming, arm_shared_za)) void svsumops_za32_s8_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za32_u8_m), arm_streaming, arm_shared_za)) void svusmopa_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za32_u8_m), arm_streaming, arm_shared_za)) void svusmops_za32_u8_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u8_m), arm_streaming, arm_shared_za)) void svwrite_hor_za128_u8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u32_m), arm_streaming, arm_shared_za)) void svwrite_hor_za128_u32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u64_m), arm_streaming, arm_shared_za)) void svwrite_hor_za128_u64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u16_m), arm_streaming, arm_shared_za)) void svwrite_hor_za128_u16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_bf16_m), arm_streaming, arm_shared_za)) void svwrite_hor_za128_bf16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s8_m), arm_streaming, arm_shared_za)) void svwrite_hor_za128_s8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f64_m), arm_streaming, arm_shared_za)) void svwrite_hor_za128_f64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f32_m), arm_streaming, arm_shared_za)) void svwrite_hor_za128_f32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f16_m), arm_streaming, arm_shared_za)) void svwrite_hor_za128_f16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s32_m), arm_streaming, arm_shared_za)) void svwrite_hor_za128_s32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s64_m), arm_streaming, arm_shared_za)) void svwrite_hor_za128_s64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s16_m), arm_streaming, arm_shared_za)) void svwrite_hor_za128_s16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_u16_m), arm_streaming, arm_shared_za)) void svwrite_hor_za16_u16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_bf16_m), arm_streaming, arm_shared_za)) void svwrite_hor_za16_bf16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_f16_m), arm_streaming, arm_shared_za)) void svwrite_hor_za16_f16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_s16_m), arm_streaming, arm_shared_za)) void svwrite_hor_za16_s16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_u32_m), arm_streaming, arm_shared_za)) void svwrite_hor_za32_u32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_f32_m), arm_streaming, arm_shared_za)) void svwrite_hor_za32_f32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_s32_m), arm_streaming, arm_shared_za)) void svwrite_hor_za32_s32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_u64_m), arm_streaming, arm_shared_za)) void svwrite_hor_za64_u64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_f64_m), arm_streaming, arm_shared_za)) void svwrite_hor_za64_f64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_s64_m), arm_streaming, arm_shared_za)) void svwrite_hor_za64_s64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_m), arm_streaming, arm_shared_za)) void svwrite_hor_za8_u8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_m), arm_streaming, arm_shared_za)) void svwrite_hor_za8_s8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u8_m), arm_streaming, arm_shared_za)) void svwrite_ver_za128_u8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u32_m), arm_streaming, arm_shared_za)) void svwrite_ver_za128_u32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u64_m), arm_streaming, arm_shared_za)) void svwrite_ver_za128_u64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u16_m), arm_streaming, arm_shared_za)) void svwrite_ver_za128_u16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_bf16_m), arm_streaming, arm_shared_za)) void svwrite_ver_za128_bf16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s8_m), arm_streaming, arm_shared_za)) void svwrite_ver_za128_s8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f64_m), arm_streaming, arm_shared_za)) void svwrite_ver_za128_f64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f32_m), arm_streaming, arm_shared_za)) void svwrite_ver_za128_f32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f16_m), arm_streaming, arm_shared_za)) void svwrite_ver_za128_f16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s32_m), arm_streaming, arm_shared_za)) void svwrite_ver_za128_s32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s64_m), arm_streaming, arm_shared_za)) void svwrite_ver_za128_s64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s16_m), arm_streaming, arm_shared_za)) void svwrite_ver_za128_s16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_m), arm_streaming, arm_shared_za)) void svwrite_ver_za16_u16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_bf16_m), arm_streaming, arm_shared_za)) void svwrite_ver_za16_bf16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_f16_m), arm_streaming, arm_shared_za)) void svwrite_ver_za16_f16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_s16_m), arm_streaming, arm_shared_za)) void svwrite_ver_za16_s16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_u32_m), arm_streaming, arm_shared_za)) void svwrite_ver_za32_u32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_f32_m), arm_streaming, arm_shared_za)) void svwrite_ver_za32_f32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_s32_m), arm_streaming, arm_shared_za)) void svwrite_ver_za32_s32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_u64_m), arm_streaming, arm_shared_za)) void svwrite_ver_za64_u64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_f64_m), arm_streaming, arm_shared_za)) void svwrite_ver_za64_f64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_s64_m), arm_streaming, arm_shared_za)) void svwrite_ver_za64_s64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_m), arm_streaming, arm_shared_za)) void svwrite_ver_za8_u8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_m), arm_streaming, arm_shared_za)) void svwrite_ver_za8_s8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_mask_za), arm_streaming_compatible, arm_shared_za)) void svzero_mask_za(uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_za), arm_streaming_compatible, arm_shared_za)) void svzero_za(); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_u32_m), arm_streaming, arm_shared_za)) void svaddha_za32_m(uint64_t, svbool_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_s32_m), arm_streaming, arm_shared_za)) void svaddha_za32_m(uint64_t, svbool_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_u32_m), arm_streaming, arm_shared_za)) void svaddva_za32_m(uint64_t, svbool_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za32_s32_m), arm_streaming, arm_shared_za)) void svaddva_za32_m(uint64_t, svbool_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f16_m), arm_streaming, arm_shared_za)) void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_bf16_m), arm_streaming, arm_shared_za)) void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_f32_m), arm_streaming, arm_shared_za)) void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_s8_m), arm_streaming, arm_shared_za)) void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_u8_m), arm_streaming, arm_shared_za)) void svmopa_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f16_m), arm_streaming, arm_shared_za)) void svmops_za32_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_bf16_m), arm_streaming, arm_shared_za)) void svmops_za32_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_f32_m), arm_streaming, arm_shared_za)) void svmops_za32_m(uint64_t, svbool_t, svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_s8_m), arm_streaming, arm_shared_za)) void svmops_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za32_u8_m), arm_streaming, arm_shared_za)) void svmops_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u8_m), arm_streaming, arm_shared_za, arm_preserves_za)) svuint8_t svread_hor_za128_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u32_m), arm_streaming, arm_shared_za, arm_preserves_za)) svuint32_t svread_hor_za128_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u64_m), arm_streaming, arm_shared_za, arm_preserves_za)) svuint64_t svread_hor_za128_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_u16_m), arm_streaming, arm_shared_za, arm_preserves_za)) svuint16_t svread_hor_za128_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za)) svbfloat16_t svread_hor_za128_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s8_m), arm_streaming, arm_shared_za, arm_preserves_za)) svint8_t svread_hor_za128_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f64_m), arm_streaming, arm_shared_za, arm_preserves_za)) svfloat64_t svread_hor_za128_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f32_m), arm_streaming, arm_shared_za, arm_preserves_za)) svfloat32_t svread_hor_za128_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_f16_m), arm_streaming, arm_shared_za, arm_preserves_za)) svfloat16_t svread_hor_za128_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s32_m), arm_streaming, arm_shared_za, arm_preserves_za)) svint32_t svread_hor_za128_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s64_m), arm_streaming, arm_shared_za, arm_preserves_za)) svint64_t svread_hor_za128_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s16_m), arm_streaming, arm_shared_za, arm_preserves_za)) svint16_t svread_hor_za128_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_u16_m), arm_streaming, arm_shared_za, arm_preserves_za)) svuint16_t svread_hor_za16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za)) svbfloat16_t svread_hor_za16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_f16_m), arm_streaming, arm_shared_za, arm_preserves_za)) svfloat16_t svread_hor_za16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_s16_m), arm_streaming, arm_shared_za, arm_preserves_za)) svint16_t svread_hor_za16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_u32_m), arm_streaming, arm_shared_za, arm_preserves_za)) svuint32_t svread_hor_za32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_f32_m), arm_streaming, arm_shared_za, arm_preserves_za)) svfloat32_t svread_hor_za32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za32_s32_m), arm_streaming, arm_shared_za, arm_preserves_za)) svint32_t svread_hor_za32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_u64_m), arm_streaming, arm_shared_za, arm_preserves_za)) svuint64_t svread_hor_za64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_f64_m), arm_streaming, arm_shared_za, arm_preserves_za)) svfloat64_t svread_hor_za64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za64_s64_m), arm_streaming, arm_shared_za, arm_preserves_za)) svint64_t svread_hor_za64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_u8_m), arm_streaming, arm_shared_za, arm_preserves_za)) svuint8_t svread_hor_za8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_s8_m), arm_streaming, arm_shared_za, arm_preserves_za)) svint8_t svread_hor_za8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u8_m), arm_streaming, arm_shared_za, arm_preserves_za)) svuint8_t svread_ver_za128_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u32_m), arm_streaming, arm_shared_za, arm_preserves_za)) svuint32_t svread_ver_za128_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u64_m), arm_streaming, arm_shared_za, arm_preserves_za)) svuint64_t svread_ver_za128_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u16_m), arm_streaming, arm_shared_za, arm_preserves_za)) svuint16_t svread_ver_za128_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za)) svbfloat16_t svread_ver_za128_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s8_m), arm_streaming, arm_shared_za, arm_preserves_za)) svint8_t svread_ver_za128_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f64_m), arm_streaming, arm_shared_za, arm_preserves_za)) svfloat64_t svread_ver_za128_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f32_m), arm_streaming, arm_shared_za, arm_preserves_za)) svfloat32_t svread_ver_za128_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_f16_m), arm_streaming, arm_shared_za, arm_preserves_za)) svfloat16_t svread_ver_za128_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s32_m), arm_streaming, arm_shared_za, arm_preserves_za)) svint32_t svread_ver_za128_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s64_m), arm_streaming, arm_shared_za, arm_preserves_za)) svint64_t svread_ver_za128_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s16_m), arm_streaming, arm_shared_za, arm_preserves_za)) svint16_t svread_ver_za128_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_u16_m), arm_streaming, arm_shared_za, arm_preserves_za)) svuint16_t svread_ver_za16_m(svuint16_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_bf16_m), arm_streaming, arm_shared_za, arm_preserves_za)) svbfloat16_t svread_ver_za16_m(svbfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_f16_m), arm_streaming, arm_shared_za, arm_preserves_za)) svfloat16_t svread_ver_za16_m(svfloat16_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_s16_m), arm_streaming, arm_shared_za, arm_preserves_za)) svint16_t svread_ver_za16_m(svint16_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_u32_m), arm_streaming, arm_shared_za, arm_preserves_za)) svuint32_t svread_ver_za32_m(svuint32_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_f32_m), arm_streaming, arm_shared_za, arm_preserves_za)) svfloat32_t svread_ver_za32_m(svfloat32_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za32_s32_m), arm_streaming, arm_shared_za, arm_preserves_za)) svint32_t svread_ver_za32_m(svint32_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_u64_m), arm_streaming, arm_shared_za, arm_preserves_za)) svuint64_t svread_ver_za64_m(svuint64_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_f64_m), arm_streaming, arm_shared_za, arm_preserves_za)) svfloat64_t svread_ver_za64_m(svfloat64_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za64_s64_m), arm_streaming, arm_shared_za, arm_preserves_za)) svint64_t svread_ver_za64_m(svint64_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_u8_m), arm_streaming, arm_shared_za, arm_preserves_za)) svuint8_t svread_ver_za8_m(svuint8_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_s8_m), arm_streaming, arm_shared_za, arm_preserves_za)) svint8_t svread_ver_za8_m(svint8_t, svbool_t, uint64_t, uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za32_s8_m), arm_streaming, arm_shared_za)) void svsumopa_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za32_s8_m), arm_streaming, arm_shared_za)) void svsumops_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za32_u8_m), arm_streaming, arm_shared_za)) void svusmopa_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za32_u8_m), arm_streaming, arm_shared_za)) void svusmops_za32_m(uint64_t, svbool_t, svbool_t, svuint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u8_m), arm_streaming, arm_shared_za)) void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u32_m), arm_streaming, arm_shared_za)) void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u64_m), arm_streaming, arm_shared_za)) void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_u16_m), arm_streaming, arm_shared_za)) void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_bf16_m), arm_streaming, arm_shared_za)) void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s8_m), arm_streaming, arm_shared_za)) void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f64_m), arm_streaming, arm_shared_za)) void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f32_m), arm_streaming, arm_shared_za)) void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_f16_m), arm_streaming, arm_shared_za)) void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s32_m), arm_streaming, arm_shared_za)) void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s64_m), arm_streaming, arm_shared_za)) void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s16_m), arm_streaming, arm_shared_za)) void svwrite_hor_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_u16_m), arm_streaming, arm_shared_za)) void svwrite_hor_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_bf16_m), arm_streaming, arm_shared_za)) void svwrite_hor_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_f16_m), arm_streaming, arm_shared_za)) void svwrite_hor_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_s16_m), arm_streaming, arm_shared_za)) void svwrite_hor_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_u32_m), arm_streaming, arm_shared_za)) void svwrite_hor_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_f32_m), arm_streaming, arm_shared_za)) void svwrite_hor_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za32_s32_m), arm_streaming, arm_shared_za)) void svwrite_hor_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_u64_m), arm_streaming, arm_shared_za)) void svwrite_hor_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_f64_m), arm_streaming, arm_shared_za)) void svwrite_hor_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za64_s64_m), arm_streaming, arm_shared_za)) void svwrite_hor_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_m), arm_streaming, arm_shared_za)) void svwrite_hor_za8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_m), arm_streaming, arm_shared_za)) void svwrite_hor_za8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u8_m), arm_streaming, arm_shared_za)) void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u32_m), arm_streaming, arm_shared_za)) void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u64_m), arm_streaming, arm_shared_za)) void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u16_m), arm_streaming, arm_shared_za)) void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_bf16_m), arm_streaming, arm_shared_za)) void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s8_m), arm_streaming, arm_shared_za)) void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f64_m), arm_streaming, arm_shared_za)) void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f32_m), arm_streaming, arm_shared_za)) void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_f16_m), arm_streaming, arm_shared_za)) void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s32_m), arm_streaming, arm_shared_za)) void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s64_m), arm_streaming, arm_shared_za)) void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s16_m), arm_streaming, arm_shared_za)) void svwrite_ver_za128_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_m), arm_streaming, arm_shared_za)) void svwrite_ver_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_bf16_m), arm_streaming, arm_shared_za)) void svwrite_ver_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_f16_m), arm_streaming, arm_shared_za)) void svwrite_ver_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_s16_m), arm_streaming, arm_shared_za)) void svwrite_ver_za16_m(uint64_t, uint32_t, uint64_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_u32_m), arm_streaming, arm_shared_za)) void svwrite_ver_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_f32_m), arm_streaming, arm_shared_za)) void svwrite_ver_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za32_s32_m), arm_streaming, arm_shared_za)) void svwrite_ver_za32_m(uint64_t, uint32_t, uint64_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_u64_m), arm_streaming, arm_shared_za)) void svwrite_ver_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_f64_m), arm_streaming, arm_shared_za)) void svwrite_ver_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za64_s64_m), arm_streaming, arm_shared_za)) void svwrite_ver_za64_m(uint64_t, uint32_t, uint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_m), arm_streaming, arm_shared_za)) void svwrite_ver_za8_m(uint64_t, uint32_t, uint64_t, svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_m), arm_streaming, arm_shared_za)) void svwrite_ver_za8_m(uint64_t, uint32_t, uint64_t, svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_f64_m), arm_streaming, arm_shared_za)) void svmopa_za64_f64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_f64_m), arm_streaming, arm_shared_za)) void svmops_za64_f64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_f64_m), arm_streaming, arm_shared_za)) void svmopa_za64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_f64_m), arm_streaming, arm_shared_za)) void svmops_za64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_u64_m), arm_streaming, arm_shared_za)) void svaddha_za64_u64_m(uint64_t, svbool_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_s64_m), arm_streaming, arm_shared_za)) void svaddha_za64_s64_m(uint64_t, svbool_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_u64_m), arm_streaming, arm_shared_za)) void svaddva_za64_u64_m(uint64_t, svbool_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_s64_m), arm_streaming, arm_shared_za)) void svaddva_za64_s64_m(uint64_t, svbool_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_s16_m), arm_streaming, arm_shared_za)) void svmopa_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_u16_m), arm_streaming, arm_shared_za)) void svmopa_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_s16_m), arm_streaming, arm_shared_za)) void svmops_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_u16_m), arm_streaming, arm_shared_za)) void svmops_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za64_s16_m), arm_streaming, arm_shared_za)) void svsumopa_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za64_s16_m), arm_streaming, arm_shared_za)) void svsumops_za64_s16_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za64_u16_m), arm_streaming, arm_shared_za)) void svusmopa_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za64_u16_m), arm_streaming, arm_shared_za)) void svusmops_za64_u16_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_u64_m), arm_streaming, arm_shared_za)) void svaddha_za64_m(uint64_t, svbool_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_s64_m), arm_streaming, arm_shared_za)) void svaddha_za64_m(uint64_t, svbool_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_u64_m), arm_streaming, arm_shared_za)) void svaddva_za64_m(uint64_t, svbool_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddva_za64_s64_m), arm_streaming, arm_shared_za)) void svaddva_za64_m(uint64_t, svbool_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_s16_m), arm_streaming, arm_shared_za)) void svmopa_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_u16_m), arm_streaming, arm_shared_za)) void svmopa_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_s16_m), arm_streaming, arm_shared_za)) void svmops_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_u16_m), arm_streaming, arm_shared_za)) void svmops_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za64_s16_m), arm_streaming, arm_shared_za)) void svsumopa_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za64_s16_m), arm_streaming, arm_shared_za)) void svsumops_za64_m(uint64_t, svbool_t, svbool_t, svint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za64_u16_m), arm_streaming, arm_shared_za)) void svusmopa_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za64_u16_m), arm_streaming, arm_shared_za)) void svusmops_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svldr_vnum_za), arm_streaming_compatible, arm_shared_za)) void svldr_vnum_za(uint32_t, uint64_t, void const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svstr_vnum_za), arm_streaming_compatible, arm_shared_za, arm_preserves_za)) void svstr_vnum_za(uint32_t, uint64_t, void *); #ifdef __cplusplus } // extern "C" #endif #undef __ai #endif /* __ARM_SME_H */ /*===---- arm_sve.h - ARM SVE intrinsics -----------------------------------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __ARM_SVE_H #define __ARM_SVE_H #if !defined(__LITTLE_ENDIAN__) #error "Big endian is currently not supported for arm_sve.h" #endif #include #ifdef __cplusplus extern "C" { #else #include #endif typedef __fp16 float16_t; typedef float float32_t; typedef double float64_t; typedef __SVInt8_t svint8_t; typedef __SVInt16_t svint16_t; typedef __SVInt32_t svint32_t; typedef __SVInt64_t svint64_t; typedef __SVUint8_t svuint8_t; typedef __SVUint16_t svuint16_t; typedef __SVUint32_t svuint32_t; typedef __SVUint64_t svuint64_t; typedef __SVFloat16_t svfloat16_t; typedef __SVBFloat16_t svbfloat16_t; #include typedef __SVFloat32_t svfloat32_t; typedef __SVFloat64_t svfloat64_t; typedef __clang_svint8x2_t svint8x2_t; typedef __clang_svint16x2_t svint16x2_t; typedef __clang_svint32x2_t svint32x2_t; typedef __clang_svint64x2_t svint64x2_t; typedef __clang_svuint8x2_t svuint8x2_t; typedef __clang_svuint16x2_t svuint16x2_t; typedef __clang_svuint32x2_t svuint32x2_t; typedef __clang_svuint64x2_t svuint64x2_t; typedef __clang_svfloat16x2_t svfloat16x2_t; typedef __clang_svfloat32x2_t svfloat32x2_t; typedef __clang_svfloat64x2_t svfloat64x2_t; typedef __clang_svint8x3_t svint8x3_t; typedef __clang_svint16x3_t svint16x3_t; typedef __clang_svint32x3_t svint32x3_t; typedef __clang_svint64x3_t svint64x3_t; typedef __clang_svuint8x3_t svuint8x3_t; typedef __clang_svuint16x3_t svuint16x3_t; typedef __clang_svuint32x3_t svuint32x3_t; typedef __clang_svuint64x3_t svuint64x3_t; typedef __clang_svfloat16x3_t svfloat16x3_t; typedef __clang_svfloat32x3_t svfloat32x3_t; typedef __clang_svfloat64x3_t svfloat64x3_t; typedef __clang_svint8x4_t svint8x4_t; typedef __clang_svint16x4_t svint16x4_t; typedef __clang_svint32x4_t svint32x4_t; typedef __clang_svint64x4_t svint64x4_t; typedef __clang_svuint8x4_t svuint8x4_t; typedef __clang_svuint16x4_t svuint16x4_t; typedef __clang_svuint32x4_t svuint32x4_t; typedef __clang_svuint64x4_t svuint64x4_t; typedef __clang_svfloat16x4_t svfloat16x4_t; typedef __clang_svfloat32x4_t svfloat32x4_t; typedef __clang_svfloat64x4_t svfloat64x4_t; typedef __SVBool_t svbool_t; typedef __clang_svboolx2_t svboolx2_t; typedef __clang_svboolx4_t svboolx4_t; typedef __clang_svbfloat16x2_t svbfloat16x2_t; typedef __clang_svbfloat16x3_t svbfloat16x3_t; typedef __clang_svbfloat16x4_t svbfloat16x4_t; typedef __SVCount_t svcount_t; enum svpattern { SV_POW2 = 0, SV_VL1 = 1, SV_VL2 = 2, SV_VL3 = 3, SV_VL4 = 4, SV_VL5 = 5, SV_VL6 = 6, SV_VL7 = 7, SV_VL8 = 8, SV_VL16 = 9, SV_VL32 = 10, SV_VL64 = 11, SV_VL128 = 12, SV_VL256 = 13, SV_MUL4 = 29, SV_MUL3 = 30, SV_ALL = 31 }; enum svprfop { SV_PLDL1KEEP = 0, SV_PLDL1STRM = 1, SV_PLDL2KEEP = 2, SV_PLDL2STRM = 3, SV_PLDL3KEEP = 4, SV_PLDL3STRM = 5, SV_PSTL1KEEP = 8, SV_PSTL1STRM = 9, SV_PSTL2KEEP = 10, SV_PSTL2STRM = 11, SV_PSTL3KEEP = 12, SV_PSTL3STRM = 13 }; /* Function attributes */ #define __ai static __inline__ __attribute__((__always_inline__, __nodebug__)) #define __aio static __inline__ __attribute__((__always_inline__, __nodebug__, __overloadable__)) #define svreinterpret_s8_s8(...) __builtin_sve_reinterpret_s8_s8(__VA_ARGS__) #define svreinterpret_s8_s16(...) __builtin_sve_reinterpret_s8_s16(__VA_ARGS__) #define svreinterpret_s8_s32(...) __builtin_sve_reinterpret_s8_s32(__VA_ARGS__) #define svreinterpret_s8_s64(...) __builtin_sve_reinterpret_s8_s64(__VA_ARGS__) #define svreinterpret_s8_u8(...) __builtin_sve_reinterpret_s8_u8(__VA_ARGS__) #define svreinterpret_s8_u16(...) __builtin_sve_reinterpret_s8_u16(__VA_ARGS__) #define svreinterpret_s8_u32(...) __builtin_sve_reinterpret_s8_u32(__VA_ARGS__) #define svreinterpret_s8_u64(...) __builtin_sve_reinterpret_s8_u64(__VA_ARGS__) #define svreinterpret_s8_f16(...) __builtin_sve_reinterpret_s8_f16(__VA_ARGS__) #define svreinterpret_s8_bf16(...) __builtin_sve_reinterpret_s8_bf16(__VA_ARGS__) #define svreinterpret_s8_f32(...) __builtin_sve_reinterpret_s8_f32(__VA_ARGS__) #define svreinterpret_s8_f64(...) __builtin_sve_reinterpret_s8_f64(__VA_ARGS__) #define svreinterpret_s16_s8(...) __builtin_sve_reinterpret_s16_s8(__VA_ARGS__) #define svreinterpret_s16_s16(...) __builtin_sve_reinterpret_s16_s16(__VA_ARGS__) #define svreinterpret_s16_s32(...) __builtin_sve_reinterpret_s16_s32(__VA_ARGS__) #define svreinterpret_s16_s64(...) __builtin_sve_reinterpret_s16_s64(__VA_ARGS__) #define svreinterpret_s16_u8(...) __builtin_sve_reinterpret_s16_u8(__VA_ARGS__) #define svreinterpret_s16_u16(...) __builtin_sve_reinterpret_s16_u16(__VA_ARGS__) #define svreinterpret_s16_u32(...) __builtin_sve_reinterpret_s16_u32(__VA_ARGS__) #define svreinterpret_s16_u64(...) __builtin_sve_reinterpret_s16_u64(__VA_ARGS__) #define svreinterpret_s16_f16(...) __builtin_sve_reinterpret_s16_f16(__VA_ARGS__) #define svreinterpret_s16_bf16(...) __builtin_sve_reinterpret_s16_bf16(__VA_ARGS__) #define svreinterpret_s16_f32(...) __builtin_sve_reinterpret_s16_f32(__VA_ARGS__) #define svreinterpret_s16_f64(...) __builtin_sve_reinterpret_s16_f64(__VA_ARGS__) #define svreinterpret_s32_s8(...) __builtin_sve_reinterpret_s32_s8(__VA_ARGS__) #define svreinterpret_s32_s16(...) __builtin_sve_reinterpret_s32_s16(__VA_ARGS__) #define svreinterpret_s32_s32(...) __builtin_sve_reinterpret_s32_s32(__VA_ARGS__) #define svreinterpret_s32_s64(...) __builtin_sve_reinterpret_s32_s64(__VA_ARGS__) #define svreinterpret_s32_u8(...) __builtin_sve_reinterpret_s32_u8(__VA_ARGS__) #define svreinterpret_s32_u16(...) __builtin_sve_reinterpret_s32_u16(__VA_ARGS__) #define svreinterpret_s32_u32(...) __builtin_sve_reinterpret_s32_u32(__VA_ARGS__) #define svreinterpret_s32_u64(...) __builtin_sve_reinterpret_s32_u64(__VA_ARGS__) #define svreinterpret_s32_f16(...) __builtin_sve_reinterpret_s32_f16(__VA_ARGS__) #define svreinterpret_s32_bf16(...) __builtin_sve_reinterpret_s32_bf16(__VA_ARGS__) #define svreinterpret_s32_f32(...) __builtin_sve_reinterpret_s32_f32(__VA_ARGS__) #define svreinterpret_s32_f64(...) __builtin_sve_reinterpret_s32_f64(__VA_ARGS__) #define svreinterpret_s64_s8(...) __builtin_sve_reinterpret_s64_s8(__VA_ARGS__) #define svreinterpret_s64_s16(...) __builtin_sve_reinterpret_s64_s16(__VA_ARGS__) #define svreinterpret_s64_s32(...) __builtin_sve_reinterpret_s64_s32(__VA_ARGS__) #define svreinterpret_s64_s64(...) __builtin_sve_reinterpret_s64_s64(__VA_ARGS__) #define svreinterpret_s64_u8(...) __builtin_sve_reinterpret_s64_u8(__VA_ARGS__) #define svreinterpret_s64_u16(...) __builtin_sve_reinterpret_s64_u16(__VA_ARGS__) #define svreinterpret_s64_u32(...) __builtin_sve_reinterpret_s64_u32(__VA_ARGS__) #define svreinterpret_s64_u64(...) __builtin_sve_reinterpret_s64_u64(__VA_ARGS__) #define svreinterpret_s64_f16(...) __builtin_sve_reinterpret_s64_f16(__VA_ARGS__) #define svreinterpret_s64_bf16(...) __builtin_sve_reinterpret_s64_bf16(__VA_ARGS__) #define svreinterpret_s64_f32(...) __builtin_sve_reinterpret_s64_f32(__VA_ARGS__) #define svreinterpret_s64_f64(...) __builtin_sve_reinterpret_s64_f64(__VA_ARGS__) #define svreinterpret_u8_s8(...) __builtin_sve_reinterpret_u8_s8(__VA_ARGS__) #define svreinterpret_u8_s16(...) __builtin_sve_reinterpret_u8_s16(__VA_ARGS__) #define svreinterpret_u8_s32(...) __builtin_sve_reinterpret_u8_s32(__VA_ARGS__) #define svreinterpret_u8_s64(...) __builtin_sve_reinterpret_u8_s64(__VA_ARGS__) #define svreinterpret_u8_u8(...) __builtin_sve_reinterpret_u8_u8(__VA_ARGS__) #define svreinterpret_u8_u16(...) __builtin_sve_reinterpret_u8_u16(__VA_ARGS__) #define svreinterpret_u8_u32(...) __builtin_sve_reinterpret_u8_u32(__VA_ARGS__) #define svreinterpret_u8_u64(...) __builtin_sve_reinterpret_u8_u64(__VA_ARGS__) #define svreinterpret_u8_f16(...) __builtin_sve_reinterpret_u8_f16(__VA_ARGS__) #define svreinterpret_u8_bf16(...) __builtin_sve_reinterpret_u8_bf16(__VA_ARGS__) #define svreinterpret_u8_f32(...) __builtin_sve_reinterpret_u8_f32(__VA_ARGS__) #define svreinterpret_u8_f64(...) __builtin_sve_reinterpret_u8_f64(__VA_ARGS__) #define svreinterpret_u16_s8(...) __builtin_sve_reinterpret_u16_s8(__VA_ARGS__) #define svreinterpret_u16_s16(...) __builtin_sve_reinterpret_u16_s16(__VA_ARGS__) #define svreinterpret_u16_s32(...) __builtin_sve_reinterpret_u16_s32(__VA_ARGS__) #define svreinterpret_u16_s64(...) __builtin_sve_reinterpret_u16_s64(__VA_ARGS__) #define svreinterpret_u16_u8(...) __builtin_sve_reinterpret_u16_u8(__VA_ARGS__) #define svreinterpret_u16_u16(...) __builtin_sve_reinterpret_u16_u16(__VA_ARGS__) #define svreinterpret_u16_u32(...) __builtin_sve_reinterpret_u16_u32(__VA_ARGS__) #define svreinterpret_u16_u64(...) __builtin_sve_reinterpret_u16_u64(__VA_ARGS__) #define svreinterpret_u16_f16(...) __builtin_sve_reinterpret_u16_f16(__VA_ARGS__) #define svreinterpret_u16_bf16(...) __builtin_sve_reinterpret_u16_bf16(__VA_ARGS__) #define svreinterpret_u16_f32(...) __builtin_sve_reinterpret_u16_f32(__VA_ARGS__) #define svreinterpret_u16_f64(...) __builtin_sve_reinterpret_u16_f64(__VA_ARGS__) #define svreinterpret_u32_s8(...) __builtin_sve_reinterpret_u32_s8(__VA_ARGS__) #define svreinterpret_u32_s16(...) __builtin_sve_reinterpret_u32_s16(__VA_ARGS__) #define svreinterpret_u32_s32(...) __builtin_sve_reinterpret_u32_s32(__VA_ARGS__) #define svreinterpret_u32_s64(...) __builtin_sve_reinterpret_u32_s64(__VA_ARGS__) #define svreinterpret_u32_u8(...) __builtin_sve_reinterpret_u32_u8(__VA_ARGS__) #define svreinterpret_u32_u16(...) __builtin_sve_reinterpret_u32_u16(__VA_ARGS__) #define svreinterpret_u32_u32(...) __builtin_sve_reinterpret_u32_u32(__VA_ARGS__) #define svreinterpret_u32_u64(...) __builtin_sve_reinterpret_u32_u64(__VA_ARGS__) #define svreinterpret_u32_f16(...) __builtin_sve_reinterpret_u32_f16(__VA_ARGS__) #define svreinterpret_u32_bf16(...) __builtin_sve_reinterpret_u32_bf16(__VA_ARGS__) #define svreinterpret_u32_f32(...) __builtin_sve_reinterpret_u32_f32(__VA_ARGS__) #define svreinterpret_u32_f64(...) __builtin_sve_reinterpret_u32_f64(__VA_ARGS__) #define svreinterpret_u64_s8(...) __builtin_sve_reinterpret_u64_s8(__VA_ARGS__) #define svreinterpret_u64_s16(...) __builtin_sve_reinterpret_u64_s16(__VA_ARGS__) #define svreinterpret_u64_s32(...) __builtin_sve_reinterpret_u64_s32(__VA_ARGS__) #define svreinterpret_u64_s64(...) __builtin_sve_reinterpret_u64_s64(__VA_ARGS__) #define svreinterpret_u64_u8(...) __builtin_sve_reinterpret_u64_u8(__VA_ARGS__) #define svreinterpret_u64_u16(...) __builtin_sve_reinterpret_u64_u16(__VA_ARGS__) #define svreinterpret_u64_u32(...) __builtin_sve_reinterpret_u64_u32(__VA_ARGS__) #define svreinterpret_u64_u64(...) __builtin_sve_reinterpret_u64_u64(__VA_ARGS__) #define svreinterpret_u64_f16(...) __builtin_sve_reinterpret_u64_f16(__VA_ARGS__) #define svreinterpret_u64_bf16(...) __builtin_sve_reinterpret_u64_bf16(__VA_ARGS__) #define svreinterpret_u64_f32(...) __builtin_sve_reinterpret_u64_f32(__VA_ARGS__) #define svreinterpret_u64_f64(...) __builtin_sve_reinterpret_u64_f64(__VA_ARGS__) #define svreinterpret_f16_s8(...) __builtin_sve_reinterpret_f16_s8(__VA_ARGS__) #define svreinterpret_f16_s16(...) __builtin_sve_reinterpret_f16_s16(__VA_ARGS__) #define svreinterpret_f16_s32(...) __builtin_sve_reinterpret_f16_s32(__VA_ARGS__) #define svreinterpret_f16_s64(...) __builtin_sve_reinterpret_f16_s64(__VA_ARGS__) #define svreinterpret_f16_u8(...) __builtin_sve_reinterpret_f16_u8(__VA_ARGS__) #define svreinterpret_f16_u16(...) __builtin_sve_reinterpret_f16_u16(__VA_ARGS__) #define svreinterpret_f16_u32(...) __builtin_sve_reinterpret_f16_u32(__VA_ARGS__) #define svreinterpret_f16_u64(...) __builtin_sve_reinterpret_f16_u64(__VA_ARGS__) #define svreinterpret_f16_f16(...) __builtin_sve_reinterpret_f16_f16(__VA_ARGS__) #define svreinterpret_f16_bf16(...) __builtin_sve_reinterpret_f16_bf16(__VA_ARGS__) #define svreinterpret_f16_f32(...) __builtin_sve_reinterpret_f16_f32(__VA_ARGS__) #define svreinterpret_f16_f64(...) __builtin_sve_reinterpret_f16_f64(__VA_ARGS__) #define svreinterpret_bf16_s8(...) __builtin_sve_reinterpret_bf16_s8(__VA_ARGS__) #define svreinterpret_bf16_s16(...) __builtin_sve_reinterpret_bf16_s16(__VA_ARGS__) #define svreinterpret_bf16_s32(...) __builtin_sve_reinterpret_bf16_s32(__VA_ARGS__) #define svreinterpret_bf16_s64(...) __builtin_sve_reinterpret_bf16_s64(__VA_ARGS__) #define svreinterpret_bf16_u8(...) __builtin_sve_reinterpret_bf16_u8(__VA_ARGS__) #define svreinterpret_bf16_u16(...) __builtin_sve_reinterpret_bf16_u16(__VA_ARGS__) #define svreinterpret_bf16_u32(...) __builtin_sve_reinterpret_bf16_u32(__VA_ARGS__) #define svreinterpret_bf16_u64(...) __builtin_sve_reinterpret_bf16_u64(__VA_ARGS__) #define svreinterpret_bf16_f16(...) __builtin_sve_reinterpret_bf16_f16(__VA_ARGS__) #define svreinterpret_bf16_bf16(...) __builtin_sve_reinterpret_bf16_bf16(__VA_ARGS__) #define svreinterpret_bf16_f32(...) __builtin_sve_reinterpret_bf16_f32(__VA_ARGS__) #define svreinterpret_bf16_f64(...) __builtin_sve_reinterpret_bf16_f64(__VA_ARGS__) #define svreinterpret_f32_s8(...) __builtin_sve_reinterpret_f32_s8(__VA_ARGS__) #define svreinterpret_f32_s16(...) __builtin_sve_reinterpret_f32_s16(__VA_ARGS__) #define svreinterpret_f32_s32(...) __builtin_sve_reinterpret_f32_s32(__VA_ARGS__) #define svreinterpret_f32_s64(...) __builtin_sve_reinterpret_f32_s64(__VA_ARGS__) #define svreinterpret_f32_u8(...) __builtin_sve_reinterpret_f32_u8(__VA_ARGS__) #define svreinterpret_f32_u16(...) __builtin_sve_reinterpret_f32_u16(__VA_ARGS__) #define svreinterpret_f32_u32(...) __builtin_sve_reinterpret_f32_u32(__VA_ARGS__) #define svreinterpret_f32_u64(...) __builtin_sve_reinterpret_f32_u64(__VA_ARGS__) #define svreinterpret_f32_f16(...) __builtin_sve_reinterpret_f32_f16(__VA_ARGS__) #define svreinterpret_f32_bf16(...) __builtin_sve_reinterpret_f32_bf16(__VA_ARGS__) #define svreinterpret_f32_f32(...) __builtin_sve_reinterpret_f32_f32(__VA_ARGS__) #define svreinterpret_f32_f64(...) __builtin_sve_reinterpret_f32_f64(__VA_ARGS__) #define svreinterpret_f64_s8(...) __builtin_sve_reinterpret_f64_s8(__VA_ARGS__) #define svreinterpret_f64_s16(...) __builtin_sve_reinterpret_f64_s16(__VA_ARGS__) #define svreinterpret_f64_s32(...) __builtin_sve_reinterpret_f64_s32(__VA_ARGS__) #define svreinterpret_f64_s64(...) __builtin_sve_reinterpret_f64_s64(__VA_ARGS__) #define svreinterpret_f64_u8(...) __builtin_sve_reinterpret_f64_u8(__VA_ARGS__) #define svreinterpret_f64_u16(...) __builtin_sve_reinterpret_f64_u16(__VA_ARGS__) #define svreinterpret_f64_u32(...) __builtin_sve_reinterpret_f64_u32(__VA_ARGS__) #define svreinterpret_f64_u64(...) __builtin_sve_reinterpret_f64_u64(__VA_ARGS__) #define svreinterpret_f64_f16(...) __builtin_sve_reinterpret_f64_f16(__VA_ARGS__) #define svreinterpret_f64_bf16(...) __builtin_sve_reinterpret_f64_bf16(__VA_ARGS__) #define svreinterpret_f64_f32(...) __builtin_sve_reinterpret_f64_f32(__VA_ARGS__) #define svreinterpret_f64_f64(...) __builtin_sve_reinterpret_f64_f64(__VA_ARGS__) __aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svint8_t op) { return __builtin_sve_reinterpret_s8_s8(op); } __aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svint16_t op) { return __builtin_sve_reinterpret_s8_s16(op); } __aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svint32_t op) { return __builtin_sve_reinterpret_s8_s32(op); } __aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svint64_t op) { return __builtin_sve_reinterpret_s8_s64(op); } __aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svuint8_t op) { return __builtin_sve_reinterpret_s8_u8(op); } __aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svuint16_t op) { return __builtin_sve_reinterpret_s8_u16(op); } __aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svuint32_t op) { return __builtin_sve_reinterpret_s8_u32(op); } __aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svuint64_t op) { return __builtin_sve_reinterpret_s8_u64(op); } __aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svfloat16_t op) { return __builtin_sve_reinterpret_s8_f16(op); } __aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svbfloat16_t op) { return __builtin_sve_reinterpret_s8_bf16(op); } __aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svfloat32_t op) { return __builtin_sve_reinterpret_s8_f32(op); } __aio __attribute__((target("sve"))) svint8_t svreinterpret_s8(svfloat64_t op) { return __builtin_sve_reinterpret_s8_f64(op); } __aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svint8_t op) { return __builtin_sve_reinterpret_s16_s8(op); } __aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svint16_t op) { return __builtin_sve_reinterpret_s16_s16(op); } __aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svint32_t op) { return __builtin_sve_reinterpret_s16_s32(op); } __aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svint64_t op) { return __builtin_sve_reinterpret_s16_s64(op); } __aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svuint8_t op) { return __builtin_sve_reinterpret_s16_u8(op); } __aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svuint16_t op) { return __builtin_sve_reinterpret_s16_u16(op); } __aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svuint32_t op) { return __builtin_sve_reinterpret_s16_u32(op); } __aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svuint64_t op) { return __builtin_sve_reinterpret_s16_u64(op); } __aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svfloat16_t op) { return __builtin_sve_reinterpret_s16_f16(op); } __aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svbfloat16_t op) { return __builtin_sve_reinterpret_s16_bf16(op); } __aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svfloat32_t op) { return __builtin_sve_reinterpret_s16_f32(op); } __aio __attribute__((target("sve"))) svint16_t svreinterpret_s16(svfloat64_t op) { return __builtin_sve_reinterpret_s16_f64(op); } __aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svint8_t op) { return __builtin_sve_reinterpret_s32_s8(op); } __aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svint16_t op) { return __builtin_sve_reinterpret_s32_s16(op); } __aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svint32_t op) { return __builtin_sve_reinterpret_s32_s32(op); } __aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svint64_t op) { return __builtin_sve_reinterpret_s32_s64(op); } __aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svuint8_t op) { return __builtin_sve_reinterpret_s32_u8(op); } __aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svuint16_t op) { return __builtin_sve_reinterpret_s32_u16(op); } __aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svuint32_t op) { return __builtin_sve_reinterpret_s32_u32(op); } __aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svuint64_t op) { return __builtin_sve_reinterpret_s32_u64(op); } __aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svfloat16_t op) { return __builtin_sve_reinterpret_s32_f16(op); } __aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svbfloat16_t op) { return __builtin_sve_reinterpret_s32_bf16(op); } __aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svfloat32_t op) { return __builtin_sve_reinterpret_s32_f32(op); } __aio __attribute__((target("sve"))) svint32_t svreinterpret_s32(svfloat64_t op) { return __builtin_sve_reinterpret_s32_f64(op); } __aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svint8_t op) { return __builtin_sve_reinterpret_s64_s8(op); } __aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svint16_t op) { return __builtin_sve_reinterpret_s64_s16(op); } __aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svint32_t op) { return __builtin_sve_reinterpret_s64_s32(op); } __aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svint64_t op) { return __builtin_sve_reinterpret_s64_s64(op); } __aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svuint8_t op) { return __builtin_sve_reinterpret_s64_u8(op); } __aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svuint16_t op) { return __builtin_sve_reinterpret_s64_u16(op); } __aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svuint32_t op) { return __builtin_sve_reinterpret_s64_u32(op); } __aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svuint64_t op) { return __builtin_sve_reinterpret_s64_u64(op); } __aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svfloat16_t op) { return __builtin_sve_reinterpret_s64_f16(op); } __aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svbfloat16_t op) { return __builtin_sve_reinterpret_s64_bf16(op); } __aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svfloat32_t op) { return __builtin_sve_reinterpret_s64_f32(op); } __aio __attribute__((target("sve"))) svint64_t svreinterpret_s64(svfloat64_t op) { return __builtin_sve_reinterpret_s64_f64(op); } __aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svint8_t op) { return __builtin_sve_reinterpret_u8_s8(op); } __aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svint16_t op) { return __builtin_sve_reinterpret_u8_s16(op); } __aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svint32_t op) { return __builtin_sve_reinterpret_u8_s32(op); } __aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svint64_t op) { return __builtin_sve_reinterpret_u8_s64(op); } __aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svuint8_t op) { return __builtin_sve_reinterpret_u8_u8(op); } __aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svuint16_t op) { return __builtin_sve_reinterpret_u8_u16(op); } __aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svuint32_t op) { return __builtin_sve_reinterpret_u8_u32(op); } __aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svuint64_t op) { return __builtin_sve_reinterpret_u8_u64(op); } __aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svfloat16_t op) { return __builtin_sve_reinterpret_u8_f16(op); } __aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svbfloat16_t op) { return __builtin_sve_reinterpret_u8_bf16(op); } __aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svfloat32_t op) { return __builtin_sve_reinterpret_u8_f32(op); } __aio __attribute__((target("sve"))) svuint8_t svreinterpret_u8(svfloat64_t op) { return __builtin_sve_reinterpret_u8_f64(op); } __aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svint8_t op) { return __builtin_sve_reinterpret_u16_s8(op); } __aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svint16_t op) { return __builtin_sve_reinterpret_u16_s16(op); } __aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svint32_t op) { return __builtin_sve_reinterpret_u16_s32(op); } __aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svint64_t op) { return __builtin_sve_reinterpret_u16_s64(op); } __aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svuint8_t op) { return __builtin_sve_reinterpret_u16_u8(op); } __aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svuint16_t op) { return __builtin_sve_reinterpret_u16_u16(op); } __aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svuint32_t op) { return __builtin_sve_reinterpret_u16_u32(op); } __aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svuint64_t op) { return __builtin_sve_reinterpret_u16_u64(op); } __aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svfloat16_t op) { return __builtin_sve_reinterpret_u16_f16(op); } __aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svbfloat16_t op) { return __builtin_sve_reinterpret_u16_bf16(op); } __aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svfloat32_t op) { return __builtin_sve_reinterpret_u16_f32(op); } __aio __attribute__((target("sve"))) svuint16_t svreinterpret_u16(svfloat64_t op) { return __builtin_sve_reinterpret_u16_f64(op); } __aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svint8_t op) { return __builtin_sve_reinterpret_u32_s8(op); } __aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svint16_t op) { return __builtin_sve_reinterpret_u32_s16(op); } __aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svint32_t op) { return __builtin_sve_reinterpret_u32_s32(op); } __aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svint64_t op) { return __builtin_sve_reinterpret_u32_s64(op); } __aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svuint8_t op) { return __builtin_sve_reinterpret_u32_u8(op); } __aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svuint16_t op) { return __builtin_sve_reinterpret_u32_u16(op); } __aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svuint32_t op) { return __builtin_sve_reinterpret_u32_u32(op); } __aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svuint64_t op) { return __builtin_sve_reinterpret_u32_u64(op); } __aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svfloat16_t op) { return __builtin_sve_reinterpret_u32_f16(op); } __aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svbfloat16_t op) { return __builtin_sve_reinterpret_u32_bf16(op); } __aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svfloat32_t op) { return __builtin_sve_reinterpret_u32_f32(op); } __aio __attribute__((target("sve"))) svuint32_t svreinterpret_u32(svfloat64_t op) { return __builtin_sve_reinterpret_u32_f64(op); } __aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svint8_t op) { return __builtin_sve_reinterpret_u64_s8(op); } __aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svint16_t op) { return __builtin_sve_reinterpret_u64_s16(op); } __aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svint32_t op) { return __builtin_sve_reinterpret_u64_s32(op); } __aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svint64_t op) { return __builtin_sve_reinterpret_u64_s64(op); } __aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svuint8_t op) { return __builtin_sve_reinterpret_u64_u8(op); } __aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svuint16_t op) { return __builtin_sve_reinterpret_u64_u16(op); } __aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svuint32_t op) { return __builtin_sve_reinterpret_u64_u32(op); } __aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svuint64_t op) { return __builtin_sve_reinterpret_u64_u64(op); } __aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svfloat16_t op) { return __builtin_sve_reinterpret_u64_f16(op); } __aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svbfloat16_t op) { return __builtin_sve_reinterpret_u64_bf16(op); } __aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svfloat32_t op) { return __builtin_sve_reinterpret_u64_f32(op); } __aio __attribute__((target("sve"))) svuint64_t svreinterpret_u64(svfloat64_t op) { return __builtin_sve_reinterpret_u64_f64(op); } __aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svint8_t op) { return __builtin_sve_reinterpret_f16_s8(op); } __aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svint16_t op) { return __builtin_sve_reinterpret_f16_s16(op); } __aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svint32_t op) { return __builtin_sve_reinterpret_f16_s32(op); } __aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svint64_t op) { return __builtin_sve_reinterpret_f16_s64(op); } __aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svuint8_t op) { return __builtin_sve_reinterpret_f16_u8(op); } __aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svuint16_t op) { return __builtin_sve_reinterpret_f16_u16(op); } __aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svuint32_t op) { return __builtin_sve_reinterpret_f16_u32(op); } __aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svuint64_t op) { return __builtin_sve_reinterpret_f16_u64(op); } __aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svfloat16_t op) { return __builtin_sve_reinterpret_f16_f16(op); } __aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svbfloat16_t op) { return __builtin_sve_reinterpret_f16_bf16(op); } __aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svfloat32_t op) { return __builtin_sve_reinterpret_f16_f32(op); } __aio __attribute__((target("sve"))) svfloat16_t svreinterpret_f16(svfloat64_t op) { return __builtin_sve_reinterpret_f16_f64(op); } __aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svint8_t op) { return __builtin_sve_reinterpret_bf16_s8(op); } __aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svint16_t op) { return __builtin_sve_reinterpret_bf16_s16(op); } __aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svint32_t op) { return __builtin_sve_reinterpret_bf16_s32(op); } __aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svint64_t op) { return __builtin_sve_reinterpret_bf16_s64(op); } __aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svuint8_t op) { return __builtin_sve_reinterpret_bf16_u8(op); } __aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svuint16_t op) { return __builtin_sve_reinterpret_bf16_u16(op); } __aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svuint32_t op) { return __builtin_sve_reinterpret_bf16_u32(op); } __aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svuint64_t op) { return __builtin_sve_reinterpret_bf16_u64(op); } __aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svfloat16_t op) { return __builtin_sve_reinterpret_bf16_f16(op); } __aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svbfloat16_t op) { return __builtin_sve_reinterpret_bf16_bf16(op); } __aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svfloat32_t op) { return __builtin_sve_reinterpret_bf16_f32(op); } __aio __attribute__((target("sve"))) svbfloat16_t svreinterpret_bf16(svfloat64_t op) { return __builtin_sve_reinterpret_bf16_f64(op); } __aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svint8_t op) { return __builtin_sve_reinterpret_f32_s8(op); } __aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svint16_t op) { return __builtin_sve_reinterpret_f32_s16(op); } __aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svint32_t op) { return __builtin_sve_reinterpret_f32_s32(op); } __aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svint64_t op) { return __builtin_sve_reinterpret_f32_s64(op); } __aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svuint8_t op) { return __builtin_sve_reinterpret_f32_u8(op); } __aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svuint16_t op) { return __builtin_sve_reinterpret_f32_u16(op); } __aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svuint32_t op) { return __builtin_sve_reinterpret_f32_u32(op); } __aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svuint64_t op) { return __builtin_sve_reinterpret_f32_u64(op); } __aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svfloat16_t op) { return __builtin_sve_reinterpret_f32_f16(op); } __aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svbfloat16_t op) { return __builtin_sve_reinterpret_f32_bf16(op); } __aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svfloat32_t op) { return __builtin_sve_reinterpret_f32_f32(op); } __aio __attribute__((target("sve"))) svfloat32_t svreinterpret_f32(svfloat64_t op) { return __builtin_sve_reinterpret_f32_f64(op); } __aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svint8_t op) { return __builtin_sve_reinterpret_f64_s8(op); } __aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svint16_t op) { return __builtin_sve_reinterpret_f64_s16(op); } __aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svint32_t op) { return __builtin_sve_reinterpret_f64_s32(op); } __aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svint64_t op) { return __builtin_sve_reinterpret_f64_s64(op); } __aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svuint8_t op) { return __builtin_sve_reinterpret_f64_u8(op); } __aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svuint16_t op) { return __builtin_sve_reinterpret_f64_u16(op); } __aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svuint32_t op) { return __builtin_sve_reinterpret_f64_u32(op); } __aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svuint64_t op) { return __builtin_sve_reinterpret_f64_u64(op); } __aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svfloat16_t op) { return __builtin_sve_reinterpret_f64_f16(op); } __aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svbfloat16_t op) { return __builtin_sve_reinterpret_f64_bf16(op); } __aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svfloat32_t op) { return __builtin_sve_reinterpret_f64_f32(op); } __aio __attribute__((target("sve"))) svfloat64_t svreinterpret_f64(svfloat64_t op) { return __builtin_sve_reinterpret_f64_f64(op); } __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f64_m))) svfloat64_t svabd_n_f64_m(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f32_m))) svfloat32_t svabd_n_f32_m(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f16_m))) svfloat16_t svabd_n_f16_m(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f64_x))) svfloat64_t svabd_n_f64_x(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f32_x))) svfloat32_t svabd_n_f32_x(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f16_x))) svfloat16_t svabd_n_f16_x(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f64_z))) svfloat64_t svabd_n_f64_z(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f32_z))) svfloat32_t svabd_n_f32_z(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f16_z))) svfloat16_t svabd_n_f16_z(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s8_m))) svint8_t svabd_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s32_m))) svint32_t svabd_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s64_m))) svint64_t svabd_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s16_m))) svint16_t svabd_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s8_x))) svint8_t svabd_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s32_x))) svint32_t svabd_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s64_x))) svint64_t svabd_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s16_x))) svint16_t svabd_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s8_z))) svint8_t svabd_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s32_z))) svint32_t svabd_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s64_z))) svint64_t svabd_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s16_z))) svint16_t svabd_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u8_m))) svuint8_t svabd_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u32_m))) svuint32_t svabd_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u64_m))) svuint64_t svabd_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u16_m))) svuint16_t svabd_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u8_x))) svuint8_t svabd_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u32_x))) svuint32_t svabd_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u64_x))) svuint64_t svabd_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u16_x))) svuint16_t svabd_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u8_z))) svuint8_t svabd_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u32_z))) svuint32_t svabd_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u64_z))) svuint64_t svabd_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u16_z))) svuint16_t svabd_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f64_m))) svfloat64_t svabd_f64_m(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f32_m))) svfloat32_t svabd_f32_m(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f16_m))) svfloat16_t svabd_f16_m(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f64_x))) svfloat64_t svabd_f64_x(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f32_x))) svfloat32_t svabd_f32_x(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f16_x))) svfloat16_t svabd_f16_x(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f64_z))) svfloat64_t svabd_f64_z(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f32_z))) svfloat32_t svabd_f32_z(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f16_z))) svfloat16_t svabd_f16_z(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s8_m))) svint8_t svabd_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s32_m))) svint32_t svabd_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s64_m))) svint64_t svabd_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s16_m))) svint16_t svabd_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s8_x))) svint8_t svabd_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s32_x))) svint32_t svabd_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s64_x))) svint64_t svabd_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s16_x))) svint16_t svabd_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s8_z))) svint8_t svabd_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s32_z))) svint32_t svabd_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s64_z))) svint64_t svabd_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s16_z))) svint16_t svabd_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u8_m))) svuint8_t svabd_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u32_m))) svuint32_t svabd_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u64_m))) svuint64_t svabd_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u16_m))) svuint16_t svabd_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u8_x))) svuint8_t svabd_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u32_x))) svuint32_t svabd_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u64_x))) svuint64_t svabd_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u16_x))) svuint16_t svabd_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u8_z))) svuint8_t svabd_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u32_z))) svuint32_t svabd_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u64_z))) svuint64_t svabd_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u16_z))) svuint16_t svabd_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f64_m))) svfloat64_t svabs_f64_m(svfloat64_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f32_m))) svfloat32_t svabs_f32_m(svfloat32_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f16_m))) svfloat16_t svabs_f16_m(svfloat16_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f64_x))) svfloat64_t svabs_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f32_x))) svfloat32_t svabs_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f16_x))) svfloat16_t svabs_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f64_z))) svfloat64_t svabs_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f32_z))) svfloat32_t svabs_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f16_z))) svfloat16_t svabs_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s8_m))) svint8_t svabs_s8_m(svint8_t, svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s32_m))) svint32_t svabs_s32_m(svint32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s64_m))) svint64_t svabs_s64_m(svint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s16_m))) svint16_t svabs_s16_m(svint16_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s8_x))) svint8_t svabs_s8_x(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s32_x))) svint32_t svabs_s32_x(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s64_x))) svint64_t svabs_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s16_x))) svint16_t svabs_s16_x(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s8_z))) svint8_t svabs_s8_z(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s32_z))) svint32_t svabs_s32_z(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s64_z))) svint64_t svabs_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s16_z))) svint16_t svabs_s16_z(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacge_n_f64))) svbool_t svacge_n_f64(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacge_n_f32))) svbool_t svacge_n_f32(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacge_n_f16))) svbool_t svacge_n_f16(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacge_f64))) svbool_t svacge_f64(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacge_f32))) svbool_t svacge_f32(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacge_f16))) svbool_t svacge_f16(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacgt_n_f64))) svbool_t svacgt_n_f64(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacgt_n_f32))) svbool_t svacgt_n_f32(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacgt_n_f16))) svbool_t svacgt_n_f16(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacgt_f64))) svbool_t svacgt_f64(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacgt_f32))) svbool_t svacgt_f32(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacgt_f16))) svbool_t svacgt_f16(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacle_n_f64))) svbool_t svacle_n_f64(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacle_n_f32))) svbool_t svacle_n_f32(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacle_n_f16))) svbool_t svacle_n_f16(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacle_f64))) svbool_t svacle_f64(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacle_f32))) svbool_t svacle_f32(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacle_f16))) svbool_t svacle_f16(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaclt_n_f64))) svbool_t svaclt_n_f64(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaclt_n_f32))) svbool_t svaclt_n_f32(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaclt_n_f16))) svbool_t svaclt_n_f16(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaclt_f64))) svbool_t svaclt_f64(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaclt_f32))) svbool_t svaclt_f32(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaclt_f16))) svbool_t svaclt_f16(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f64_m))) svfloat64_t svadd_n_f64_m(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f32_m))) svfloat32_t svadd_n_f32_m(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f16_m))) svfloat16_t svadd_n_f16_m(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f64_x))) svfloat64_t svadd_n_f64_x(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f32_x))) svfloat32_t svadd_n_f32_x(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f16_x))) svfloat16_t svadd_n_f16_x(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f64_z))) svfloat64_t svadd_n_f64_z(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f32_z))) svfloat32_t svadd_n_f32_z(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f16_z))) svfloat16_t svadd_n_f16_z(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u8_m))) svuint8_t svadd_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u32_m))) svuint32_t svadd_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u64_m))) svuint64_t svadd_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u16_m))) svuint16_t svadd_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s8_m))) svint8_t svadd_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s32_m))) svint32_t svadd_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s64_m))) svint64_t svadd_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s16_m))) svint16_t svadd_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u8_x))) svuint8_t svadd_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u32_x))) svuint32_t svadd_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u64_x))) svuint64_t svadd_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u16_x))) svuint16_t svadd_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s8_x))) svint8_t svadd_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s32_x))) svint32_t svadd_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s64_x))) svint64_t svadd_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s16_x))) svint16_t svadd_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u8_z))) svuint8_t svadd_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u32_z))) svuint32_t svadd_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u64_z))) svuint64_t svadd_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u16_z))) svuint16_t svadd_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s8_z))) svint8_t svadd_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s32_z))) svint32_t svadd_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s64_z))) svint64_t svadd_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s16_z))) svint16_t svadd_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f64_m))) svfloat64_t svadd_f64_m(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f32_m))) svfloat32_t svadd_f32_m(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f16_m))) svfloat16_t svadd_f16_m(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f64_x))) svfloat64_t svadd_f64_x(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f32_x))) svfloat32_t svadd_f32_x(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f16_x))) svfloat16_t svadd_f16_x(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f64_z))) svfloat64_t svadd_f64_z(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f32_z))) svfloat32_t svadd_f32_z(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f16_z))) svfloat16_t svadd_f16_z(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u8_m))) svuint8_t svadd_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u32_m))) svuint32_t svadd_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u64_m))) svuint64_t svadd_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u16_m))) svuint16_t svadd_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s8_m))) svint8_t svadd_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s32_m))) svint32_t svadd_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s64_m))) svint64_t svadd_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s16_m))) svint16_t svadd_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u8_x))) svuint8_t svadd_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u32_x))) svuint32_t svadd_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u64_x))) svuint64_t svadd_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u16_x))) svuint16_t svadd_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s8_x))) svint8_t svadd_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s32_x))) svint32_t svadd_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s64_x))) svint64_t svadd_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s16_x))) svint16_t svadd_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u8_z))) svuint8_t svadd_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u32_z))) svuint32_t svadd_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u64_z))) svuint64_t svadd_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u16_z))) svuint16_t svadd_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s8_z))) svint8_t svadd_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s32_z))) svint32_t svadd_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s64_z))) svint64_t svadd_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s16_z))) svint16_t svadd_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadda_f64))) float64_t svadda_f64(svbool_t, float64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadda_f32))) float32_t svadda_f32(svbool_t, float32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadda_f16))) float16_t svadda_f16(svbool_t, float16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_s8))) int64_t svaddv_s8(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_s32))) int64_t svaddv_s32(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_s64))) int64_t svaddv_s64(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_s16))) int64_t svaddv_s16(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_u8))) uint64_t svaddv_u8(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_u32))) uint64_t svaddv_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_u64))) uint64_t svaddv_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_u16))) uint64_t svaddv_u16(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_f64))) float64_t svaddv_f64(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_f32))) float32_t svaddv_f32(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_f16))) float16_t svaddv_f16(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrb_u32base_u32offset))) svuint32_t svadrb_u32base_u32offset(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrb_u64base_u64offset))) svuint64_t svadrb_u64base_u64offset(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrb_u32base_s32offset))) svuint32_t svadrb_u32base_s32offset(svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrb_u64base_s64offset))) svuint64_t svadrb_u64base_s64offset(svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrd_u32base_u32index))) svuint32_t svadrd_u32base_u32index(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrd_u64base_u64index))) svuint64_t svadrd_u64base_u64index(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrd_u32base_s32index))) svuint32_t svadrd_u32base_s32index(svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrd_u64base_s64index))) svuint64_t svadrd_u64base_s64index(svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrh_u32base_u32index))) svuint32_t svadrh_u32base_u32index(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrh_u64base_u64index))) svuint64_t svadrh_u64base_u64index(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrh_u32base_s32index))) svuint32_t svadrh_u32base_s32index(svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrh_u64base_s64index))) svuint64_t svadrh_u64base_s64index(svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrw_u32base_u32index))) svuint32_t svadrw_u32base_u32index(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrw_u64base_u64index))) svuint64_t svadrw_u64base_u64index(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrw_u32base_s32index))) svuint32_t svadrw_u32base_s32index(svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrw_u64base_s64index))) svuint64_t svadrw_u64base_s64index(svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_b_z))) svbool_t svand_b_z(svbool_t, svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u8_m))) svuint8_t svand_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u32_m))) svuint32_t svand_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u64_m))) svuint64_t svand_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u16_m))) svuint16_t svand_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s8_m))) svint8_t svand_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s32_m))) svint32_t svand_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s64_m))) svint64_t svand_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s16_m))) svint16_t svand_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u8_x))) svuint8_t svand_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u32_x))) svuint32_t svand_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u64_x))) svuint64_t svand_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u16_x))) svuint16_t svand_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s8_x))) svint8_t svand_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s32_x))) svint32_t svand_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s64_x))) svint64_t svand_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s16_x))) svint16_t svand_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u8_z))) svuint8_t svand_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u32_z))) svuint32_t svand_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u64_z))) svuint64_t svand_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u16_z))) svuint16_t svand_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s8_z))) svint8_t svand_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s32_z))) svint32_t svand_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s64_z))) svint64_t svand_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s16_z))) svint16_t svand_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u8_m))) svuint8_t svand_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u32_m))) svuint32_t svand_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u64_m))) svuint64_t svand_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u16_m))) svuint16_t svand_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s8_m))) svint8_t svand_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s32_m))) svint32_t svand_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s64_m))) svint64_t svand_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s16_m))) svint16_t svand_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u8_x))) svuint8_t svand_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u32_x))) svuint32_t svand_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u64_x))) svuint64_t svand_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u16_x))) svuint16_t svand_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s8_x))) svint8_t svand_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s32_x))) svint32_t svand_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s64_x))) svint64_t svand_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s16_x))) svint16_t svand_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u8_z))) svuint8_t svand_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u32_z))) svuint32_t svand_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u64_z))) svuint64_t svand_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u16_z))) svuint16_t svand_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s8_z))) svint8_t svand_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s32_z))) svint32_t svand_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s64_z))) svint64_t svand_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s16_z))) svint16_t svand_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandv_u8))) uint8_t svandv_u8(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandv_u32))) uint32_t svandv_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandv_u64))) uint64_t svandv_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandv_u16))) uint16_t svandv_u16(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandv_s8))) int8_t svandv_s8(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandv_s32))) int32_t svandv_s32(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandv_s64))) int64_t svandv_s64(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandv_s16))) int16_t svandv_s16(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s8_m))) svint8_t svasr_n_s8_m(svbool_t, svint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s32_m))) svint32_t svasr_n_s32_m(svbool_t, svint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s64_m))) svint64_t svasr_n_s64_m(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s16_m))) svint16_t svasr_n_s16_m(svbool_t, svint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s8_x))) svint8_t svasr_n_s8_x(svbool_t, svint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s32_x))) svint32_t svasr_n_s32_x(svbool_t, svint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s64_x))) svint64_t svasr_n_s64_x(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s16_x))) svint16_t svasr_n_s16_x(svbool_t, svint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s8_z))) svint8_t svasr_n_s8_z(svbool_t, svint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s32_z))) svint32_t svasr_n_s32_z(svbool_t, svint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s64_z))) svint64_t svasr_n_s64_z(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s16_z))) svint16_t svasr_n_s16_z(svbool_t, svint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s8_m))) svint8_t svasr_s8_m(svbool_t, svint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s32_m))) svint32_t svasr_s32_m(svbool_t, svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s64_m))) svint64_t svasr_s64_m(svbool_t, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s16_m))) svint16_t svasr_s16_m(svbool_t, svint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s8_x))) svint8_t svasr_s8_x(svbool_t, svint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s32_x))) svint32_t svasr_s32_x(svbool_t, svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s64_x))) svint64_t svasr_s64_x(svbool_t, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s16_x))) svint16_t svasr_s16_x(svbool_t, svint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s8_z))) svint8_t svasr_s8_z(svbool_t, svint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s32_z))) svint32_t svasr_s32_z(svbool_t, svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s64_z))) svint64_t svasr_s64_z(svbool_t, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s16_z))) svint16_t svasr_s16_z(svbool_t, svint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s8_m))) svint8_t svasr_wide_n_s8_m(svbool_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s32_m))) svint32_t svasr_wide_n_s32_m(svbool_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s16_m))) svint16_t svasr_wide_n_s16_m(svbool_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s8_x))) svint8_t svasr_wide_n_s8_x(svbool_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s32_x))) svint32_t svasr_wide_n_s32_x(svbool_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s16_x))) svint16_t svasr_wide_n_s16_x(svbool_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s8_z))) svint8_t svasr_wide_n_s8_z(svbool_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s32_z))) svint32_t svasr_wide_n_s32_z(svbool_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s16_z))) svint16_t svasr_wide_n_s16_z(svbool_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s8_m))) svint8_t svasr_wide_s8_m(svbool_t, svint8_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s32_m))) svint32_t svasr_wide_s32_m(svbool_t, svint32_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s16_m))) svint16_t svasr_wide_s16_m(svbool_t, svint16_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s8_x))) svint8_t svasr_wide_s8_x(svbool_t, svint8_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s32_x))) svint32_t svasr_wide_s32_x(svbool_t, svint32_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s16_x))) svint16_t svasr_wide_s16_x(svbool_t, svint16_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s8_z))) svint8_t svasr_wide_s8_z(svbool_t, svint8_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s32_z))) svint32_t svasr_wide_s32_z(svbool_t, svint32_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s16_z))) svint16_t svasr_wide_s16_z(svbool_t, svint16_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s8_m))) svint8_t svasrd_n_s8_m(svbool_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s32_m))) svint32_t svasrd_n_s32_m(svbool_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s64_m))) svint64_t svasrd_n_s64_m(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s16_m))) svint16_t svasrd_n_s16_m(svbool_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s8_x))) svint8_t svasrd_n_s8_x(svbool_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s32_x))) svint32_t svasrd_n_s32_x(svbool_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s64_x))) svint64_t svasrd_n_s64_x(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s16_x))) svint16_t svasrd_n_s16_x(svbool_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s8_z))) svint8_t svasrd_n_s8_z(svbool_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s32_z))) svint32_t svasrd_n_s32_z(svbool_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s64_z))) svint64_t svasrd_n_s64_z(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s16_z))) svint16_t svasrd_n_s16_z(svbool_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_b_z))) svbool_t svbic_b_z(svbool_t, svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u8_m))) svuint8_t svbic_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u32_m))) svuint32_t svbic_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u64_m))) svuint64_t svbic_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u16_m))) svuint16_t svbic_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s8_m))) svint8_t svbic_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s32_m))) svint32_t svbic_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s64_m))) svint64_t svbic_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s16_m))) svint16_t svbic_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u8_x))) svuint8_t svbic_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u32_x))) svuint32_t svbic_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u64_x))) svuint64_t svbic_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u16_x))) svuint16_t svbic_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s8_x))) svint8_t svbic_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s32_x))) svint32_t svbic_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s64_x))) svint64_t svbic_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s16_x))) svint16_t svbic_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u8_z))) svuint8_t svbic_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u32_z))) svuint32_t svbic_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u64_z))) svuint64_t svbic_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u16_z))) svuint16_t svbic_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s8_z))) svint8_t svbic_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s32_z))) svint32_t svbic_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s64_z))) svint64_t svbic_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s16_z))) svint16_t svbic_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u8_m))) svuint8_t svbic_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u32_m))) svuint32_t svbic_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u64_m))) svuint64_t svbic_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u16_m))) svuint16_t svbic_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s8_m))) svint8_t svbic_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s32_m))) svint32_t svbic_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s64_m))) svint64_t svbic_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s16_m))) svint16_t svbic_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u8_x))) svuint8_t svbic_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u32_x))) svuint32_t svbic_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u64_x))) svuint64_t svbic_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u16_x))) svuint16_t svbic_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s8_x))) svint8_t svbic_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s32_x))) svint32_t svbic_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s64_x))) svint64_t svbic_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s16_x))) svint16_t svbic_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u8_z))) svuint8_t svbic_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u32_z))) svuint32_t svbic_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u64_z))) svuint64_t svbic_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u16_z))) svuint16_t svbic_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s8_z))) svint8_t svbic_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s32_z))) svint32_t svbic_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s64_z))) svint64_t svbic_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s16_z))) svint16_t svbic_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbrka_b_m))) svbool_t svbrka_b_m(svbool_t, svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbrka_b_z))) svbool_t svbrka_b_z(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbrkb_b_m))) svbool_t svbrkb_b_m(svbool_t, svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbrkb_b_z))) svbool_t svbrkb_b_z(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbrkn_b_z))) svbool_t svbrkn_b_z(svbool_t, svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbrkpa_b_z))) svbool_t svbrkpa_b_z(svbool_t, svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbrkpb_b_z))) svbool_t svbrkpb_b_z(svbool_t, svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f64_m))) svfloat64_t svcadd_f64_m(svbool_t, svfloat64_t, svfloat64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f32_m))) svfloat32_t svcadd_f32_m(svbool_t, svfloat32_t, svfloat32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f16_m))) svfloat16_t svcadd_f16_m(svbool_t, svfloat16_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f64_x))) svfloat64_t svcadd_f64_x(svbool_t, svfloat64_t, svfloat64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f32_x))) svfloat32_t svcadd_f32_x(svbool_t, svfloat32_t, svfloat32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f16_x))) svfloat16_t svcadd_f16_x(svbool_t, svfloat16_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f64_z))) svfloat64_t svcadd_f64_z(svbool_t, svfloat64_t, svfloat64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f32_z))) svfloat32_t svcadd_f32_z(svbool_t, svfloat32_t, svfloat32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f16_z))) svfloat16_t svcadd_f16_z(svbool_t, svfloat16_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_u8))) uint8_t svclasta_n_u8(svbool_t, uint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_u32))) uint32_t svclasta_n_u32(svbool_t, uint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_u64))) uint64_t svclasta_n_u64(svbool_t, uint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_u16))) uint16_t svclasta_n_u16(svbool_t, uint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_s8))) int8_t svclasta_n_s8(svbool_t, int8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_f64))) float64_t svclasta_n_f64(svbool_t, float64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_f32))) float32_t svclasta_n_f32(svbool_t, float32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_f16))) float16_t svclasta_n_f16(svbool_t, float16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_s32))) int32_t svclasta_n_s32(svbool_t, int32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_s64))) int64_t svclasta_n_s64(svbool_t, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_s16))) int16_t svclasta_n_s16(svbool_t, int16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_u8))) svuint8_t svclasta_u8(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_u32))) svuint32_t svclasta_u32(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_u64))) svuint64_t svclasta_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_u16))) svuint16_t svclasta_u16(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_s8))) svint8_t svclasta_s8(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_f64))) svfloat64_t svclasta_f64(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_f32))) svfloat32_t svclasta_f32(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_f16))) svfloat16_t svclasta_f16(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_s32))) svint32_t svclasta_s32(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_s64))) svint64_t svclasta_s64(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_s16))) svint16_t svclasta_s16(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_u8))) uint8_t svclastb_n_u8(svbool_t, uint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_u32))) uint32_t svclastb_n_u32(svbool_t, uint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_u64))) uint64_t svclastb_n_u64(svbool_t, uint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_u16))) uint16_t svclastb_n_u16(svbool_t, uint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_s8))) int8_t svclastb_n_s8(svbool_t, int8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_f64))) float64_t svclastb_n_f64(svbool_t, float64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_f32))) float32_t svclastb_n_f32(svbool_t, float32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_f16))) float16_t svclastb_n_f16(svbool_t, float16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_s32))) int32_t svclastb_n_s32(svbool_t, int32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_s64))) int64_t svclastb_n_s64(svbool_t, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_s16))) int16_t svclastb_n_s16(svbool_t, int16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_u8))) svuint8_t svclastb_u8(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_u32))) svuint32_t svclastb_u32(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_u64))) svuint64_t svclastb_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_u16))) svuint16_t svclastb_u16(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_s8))) svint8_t svclastb_s8(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_f64))) svfloat64_t svclastb_f64(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_f32))) svfloat32_t svclastb_f32(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_f16))) svfloat16_t svclastb_f16(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_s32))) svint32_t svclastb_s32(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_s64))) svint64_t svclastb_s64(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_s16))) svint16_t svclastb_s16(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s8_m))) svuint8_t svcls_s8_m(svuint8_t, svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s32_m))) svuint32_t svcls_s32_m(svuint32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s64_m))) svuint64_t svcls_s64_m(svuint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s16_m))) svuint16_t svcls_s16_m(svuint16_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s8_x))) svuint8_t svcls_s8_x(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s32_x))) svuint32_t svcls_s32_x(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s64_x))) svuint64_t svcls_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s16_x))) svuint16_t svcls_s16_x(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s8_z))) svuint8_t svcls_s8_z(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s32_z))) svuint32_t svcls_s32_z(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s64_z))) svuint64_t svcls_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s16_z))) svuint16_t svcls_s16_z(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u8_m))) svuint8_t svclz_u8_m(svuint8_t, svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u32_m))) svuint32_t svclz_u32_m(svuint32_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u64_m))) svuint64_t svclz_u64_m(svuint64_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u16_m))) svuint16_t svclz_u16_m(svuint16_t, svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s8_m))) svuint8_t svclz_s8_m(svuint8_t, svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s32_m))) svuint32_t svclz_s32_m(svuint32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s64_m))) svuint64_t svclz_s64_m(svuint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s16_m))) svuint16_t svclz_s16_m(svuint16_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u8_x))) svuint8_t svclz_u8_x(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u32_x))) svuint32_t svclz_u32_x(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u64_x))) svuint64_t svclz_u64_x(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u16_x))) svuint16_t svclz_u16_x(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s8_x))) svuint8_t svclz_s8_x(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s32_x))) svuint32_t svclz_s32_x(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s64_x))) svuint64_t svclz_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s16_x))) svuint16_t svclz_s16_x(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u8_z))) svuint8_t svclz_u8_z(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u32_z))) svuint32_t svclz_u32_z(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u64_z))) svuint64_t svclz_u64_z(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u16_z))) svuint16_t svclz_u16_z(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s8_z))) svuint8_t svclz_s8_z(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s32_z))) svuint32_t svclz_s32_z(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s64_z))) svuint64_t svclz_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s16_z))) svuint16_t svclz_s16_z(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f64_m))) svfloat64_t svcmla_f64_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f32_m))) svfloat32_t svcmla_f32_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f16_m))) svfloat16_t svcmla_f16_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f64_x))) svfloat64_t svcmla_f64_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f32_x))) svfloat32_t svcmla_f32_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f16_x))) svfloat16_t svcmla_f16_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f64_z))) svfloat64_t svcmla_f64_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f32_z))) svfloat32_t svcmla_f32_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f16_z))) svfloat16_t svcmla_f16_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_lane_f32))) svfloat32_t svcmla_lane_f32(svfloat32_t, svfloat32_t, svfloat32_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_lane_f16))) svfloat16_t svcmla_lane_f16(svfloat16_t, svfloat16_t, svfloat16_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_f64))) svbool_t svcmpeq_n_f64(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_f32))) svbool_t svcmpeq_n_f32(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_f16))) svbool_t svcmpeq_n_f16(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_u8))) svbool_t svcmpeq_n_u8(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_u32))) svbool_t svcmpeq_n_u32(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_u64))) svbool_t svcmpeq_n_u64(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_u16))) svbool_t svcmpeq_n_u16(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_s8))) svbool_t svcmpeq_n_s8(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_s32))) svbool_t svcmpeq_n_s32(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_s64))) svbool_t svcmpeq_n_s64(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_s16))) svbool_t svcmpeq_n_s16(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_u8))) svbool_t svcmpeq_u8(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_u32))) svbool_t svcmpeq_u32(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_u64))) svbool_t svcmpeq_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_u16))) svbool_t svcmpeq_u16(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_s8))) svbool_t svcmpeq_s8(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_s32))) svbool_t svcmpeq_s32(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_s64))) svbool_t svcmpeq_s64(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_s16))) svbool_t svcmpeq_s16(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_f64))) svbool_t svcmpeq_f64(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_f32))) svbool_t svcmpeq_f32(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_f16))) svbool_t svcmpeq_f16(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_wide_n_s8))) svbool_t svcmpeq_wide_n_s8(svbool_t, svint8_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_wide_n_s32))) svbool_t svcmpeq_wide_n_s32(svbool_t, svint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_wide_n_s16))) svbool_t svcmpeq_wide_n_s16(svbool_t, svint16_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_wide_s8))) svbool_t svcmpeq_wide_s8(svbool_t, svint8_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_wide_s32))) svbool_t svcmpeq_wide_s32(svbool_t, svint32_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_wide_s16))) svbool_t svcmpeq_wide_s16(svbool_t, svint16_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_f64))) svbool_t svcmpge_n_f64(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_f32))) svbool_t svcmpge_n_f32(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_f16))) svbool_t svcmpge_n_f16(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_s8))) svbool_t svcmpge_n_s8(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_s32))) svbool_t svcmpge_n_s32(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_s64))) svbool_t svcmpge_n_s64(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_s16))) svbool_t svcmpge_n_s16(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_u8))) svbool_t svcmpge_n_u8(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_u32))) svbool_t svcmpge_n_u32(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_u64))) svbool_t svcmpge_n_u64(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_u16))) svbool_t svcmpge_n_u16(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_s8))) svbool_t svcmpge_s8(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_s32))) svbool_t svcmpge_s32(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_s64))) svbool_t svcmpge_s64(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_s16))) svbool_t svcmpge_s16(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_f64))) svbool_t svcmpge_f64(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_f32))) svbool_t svcmpge_f32(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_f16))) svbool_t svcmpge_f16(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_u8))) svbool_t svcmpge_u8(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_u32))) svbool_t svcmpge_u32(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_u64))) svbool_t svcmpge_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_u16))) svbool_t svcmpge_u16(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_n_s8))) svbool_t svcmpge_wide_n_s8(svbool_t, svint8_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_n_s32))) svbool_t svcmpge_wide_n_s32(svbool_t, svint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_n_s16))) svbool_t svcmpge_wide_n_s16(svbool_t, svint16_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_n_u8))) svbool_t svcmpge_wide_n_u8(svbool_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_n_u32))) svbool_t svcmpge_wide_n_u32(svbool_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_n_u16))) svbool_t svcmpge_wide_n_u16(svbool_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_s8))) svbool_t svcmpge_wide_s8(svbool_t, svint8_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_s32))) svbool_t svcmpge_wide_s32(svbool_t, svint32_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_s16))) svbool_t svcmpge_wide_s16(svbool_t, svint16_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_u8))) svbool_t svcmpge_wide_u8(svbool_t, svuint8_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_u32))) svbool_t svcmpge_wide_u32(svbool_t, svuint32_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_u16))) svbool_t svcmpge_wide_u16(svbool_t, svuint16_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_f64))) svbool_t svcmpgt_n_f64(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_f32))) svbool_t svcmpgt_n_f32(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_f16))) svbool_t svcmpgt_n_f16(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_s8))) svbool_t svcmpgt_n_s8(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_s32))) svbool_t svcmpgt_n_s32(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_s64))) svbool_t svcmpgt_n_s64(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_s16))) svbool_t svcmpgt_n_s16(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_u8))) svbool_t svcmpgt_n_u8(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_u32))) svbool_t svcmpgt_n_u32(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_u64))) svbool_t svcmpgt_n_u64(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_u16))) svbool_t svcmpgt_n_u16(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_s8))) svbool_t svcmpgt_s8(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_s32))) svbool_t svcmpgt_s32(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_s64))) svbool_t svcmpgt_s64(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_s16))) svbool_t svcmpgt_s16(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_f64))) svbool_t svcmpgt_f64(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_f32))) svbool_t svcmpgt_f32(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_f16))) svbool_t svcmpgt_f16(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_u8))) svbool_t svcmpgt_u8(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_u32))) svbool_t svcmpgt_u32(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_u64))) svbool_t svcmpgt_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_u16))) svbool_t svcmpgt_u16(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_n_s8))) svbool_t svcmpgt_wide_n_s8(svbool_t, svint8_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_n_s32))) svbool_t svcmpgt_wide_n_s32(svbool_t, svint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_n_s16))) svbool_t svcmpgt_wide_n_s16(svbool_t, svint16_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_n_u8))) svbool_t svcmpgt_wide_n_u8(svbool_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_n_u32))) svbool_t svcmpgt_wide_n_u32(svbool_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_n_u16))) svbool_t svcmpgt_wide_n_u16(svbool_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_s8))) svbool_t svcmpgt_wide_s8(svbool_t, svint8_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_s32))) svbool_t svcmpgt_wide_s32(svbool_t, svint32_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_s16))) svbool_t svcmpgt_wide_s16(svbool_t, svint16_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_u8))) svbool_t svcmpgt_wide_u8(svbool_t, svuint8_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_u32))) svbool_t svcmpgt_wide_u32(svbool_t, svuint32_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_u16))) svbool_t svcmpgt_wide_u16(svbool_t, svuint16_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_f64))) svbool_t svcmple_n_f64(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_f32))) svbool_t svcmple_n_f32(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_f16))) svbool_t svcmple_n_f16(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_s8))) svbool_t svcmple_n_s8(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_s32))) svbool_t svcmple_n_s32(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_s64))) svbool_t svcmple_n_s64(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_s16))) svbool_t svcmple_n_s16(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_u8))) svbool_t svcmple_n_u8(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_u32))) svbool_t svcmple_n_u32(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_u64))) svbool_t svcmple_n_u64(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_u16))) svbool_t svcmple_n_u16(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_s8))) svbool_t svcmple_s8(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_s32))) svbool_t svcmple_s32(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_s64))) svbool_t svcmple_s64(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_s16))) svbool_t svcmple_s16(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_f64))) svbool_t svcmple_f64(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_f32))) svbool_t svcmple_f32(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_f16))) svbool_t svcmple_f16(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_u8))) svbool_t svcmple_u8(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_u32))) svbool_t svcmple_u32(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_u64))) svbool_t svcmple_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_u16))) svbool_t svcmple_u16(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_n_s8))) svbool_t svcmple_wide_n_s8(svbool_t, svint8_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_n_s32))) svbool_t svcmple_wide_n_s32(svbool_t, svint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_n_s16))) svbool_t svcmple_wide_n_s16(svbool_t, svint16_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_n_u8))) svbool_t svcmple_wide_n_u8(svbool_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_n_u32))) svbool_t svcmple_wide_n_u32(svbool_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_n_u16))) svbool_t svcmple_wide_n_u16(svbool_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_s8))) svbool_t svcmple_wide_s8(svbool_t, svint8_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_s32))) svbool_t svcmple_wide_s32(svbool_t, svint32_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_s16))) svbool_t svcmple_wide_s16(svbool_t, svint16_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_u8))) svbool_t svcmple_wide_u8(svbool_t, svuint8_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_u32))) svbool_t svcmple_wide_u32(svbool_t, svuint32_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_u16))) svbool_t svcmple_wide_u16(svbool_t, svuint16_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_u8))) svbool_t svcmplt_n_u8(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_u32))) svbool_t svcmplt_n_u32(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_u64))) svbool_t svcmplt_n_u64(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_u16))) svbool_t svcmplt_n_u16(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_f64))) svbool_t svcmplt_n_f64(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_f32))) svbool_t svcmplt_n_f32(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_f16))) svbool_t svcmplt_n_f16(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_s8))) svbool_t svcmplt_n_s8(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_s32))) svbool_t svcmplt_n_s32(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_s64))) svbool_t svcmplt_n_s64(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_s16))) svbool_t svcmplt_n_s16(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_u8))) svbool_t svcmplt_u8(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_u32))) svbool_t svcmplt_u32(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_u64))) svbool_t svcmplt_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_u16))) svbool_t svcmplt_u16(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_s8))) svbool_t svcmplt_s8(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_s32))) svbool_t svcmplt_s32(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_s64))) svbool_t svcmplt_s64(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_s16))) svbool_t svcmplt_s16(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_f64))) svbool_t svcmplt_f64(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_f32))) svbool_t svcmplt_f32(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_f16))) svbool_t svcmplt_f16(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_n_u8))) svbool_t svcmplt_wide_n_u8(svbool_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_n_u32))) svbool_t svcmplt_wide_n_u32(svbool_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_n_u16))) svbool_t svcmplt_wide_n_u16(svbool_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_n_s8))) svbool_t svcmplt_wide_n_s8(svbool_t, svint8_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_n_s32))) svbool_t svcmplt_wide_n_s32(svbool_t, svint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_n_s16))) svbool_t svcmplt_wide_n_s16(svbool_t, svint16_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_u8))) svbool_t svcmplt_wide_u8(svbool_t, svuint8_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_u32))) svbool_t svcmplt_wide_u32(svbool_t, svuint32_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_u16))) svbool_t svcmplt_wide_u16(svbool_t, svuint16_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_s8))) svbool_t svcmplt_wide_s8(svbool_t, svint8_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_s32))) svbool_t svcmplt_wide_s32(svbool_t, svint32_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_s16))) svbool_t svcmplt_wide_s16(svbool_t, svint16_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_f64))) svbool_t svcmpne_n_f64(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_f32))) svbool_t svcmpne_n_f32(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_f16))) svbool_t svcmpne_n_f16(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_u8))) svbool_t svcmpne_n_u8(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_u32))) svbool_t svcmpne_n_u32(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_u64))) svbool_t svcmpne_n_u64(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_u16))) svbool_t svcmpne_n_u16(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_s8))) svbool_t svcmpne_n_s8(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_s32))) svbool_t svcmpne_n_s32(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_s64))) svbool_t svcmpne_n_s64(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_s16))) svbool_t svcmpne_n_s16(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_u8))) svbool_t svcmpne_u8(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_u32))) svbool_t svcmpne_u32(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_u64))) svbool_t svcmpne_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_u16))) svbool_t svcmpne_u16(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_s8))) svbool_t svcmpne_s8(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_s32))) svbool_t svcmpne_s32(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_s64))) svbool_t svcmpne_s64(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_s16))) svbool_t svcmpne_s16(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_f64))) svbool_t svcmpne_f64(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_f32))) svbool_t svcmpne_f32(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_f16))) svbool_t svcmpne_f16(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_wide_n_s8))) svbool_t svcmpne_wide_n_s8(svbool_t, svint8_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_wide_n_s32))) svbool_t svcmpne_wide_n_s32(svbool_t, svint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_wide_n_s16))) svbool_t svcmpne_wide_n_s16(svbool_t, svint16_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_wide_s8))) svbool_t svcmpne_wide_s8(svbool_t, svint8_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_wide_s32))) svbool_t svcmpne_wide_s32(svbool_t, svint32_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_wide_s16))) svbool_t svcmpne_wide_s16(svbool_t, svint16_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpuo_n_f64))) svbool_t svcmpuo_n_f64(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpuo_n_f32))) svbool_t svcmpuo_n_f32(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpuo_n_f16))) svbool_t svcmpuo_n_f16(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpuo_f64))) svbool_t svcmpuo_f64(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpuo_f32))) svbool_t svcmpuo_f32(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpuo_f16))) svbool_t svcmpuo_f16(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u8_m))) svuint8_t svcnot_u8_m(svuint8_t, svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u32_m))) svuint32_t svcnot_u32_m(svuint32_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u64_m))) svuint64_t svcnot_u64_m(svuint64_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u16_m))) svuint16_t svcnot_u16_m(svuint16_t, svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s8_m))) svint8_t svcnot_s8_m(svint8_t, svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s32_m))) svint32_t svcnot_s32_m(svint32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s64_m))) svint64_t svcnot_s64_m(svint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s16_m))) svint16_t svcnot_s16_m(svint16_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u8_x))) svuint8_t svcnot_u8_x(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u32_x))) svuint32_t svcnot_u32_x(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u64_x))) svuint64_t svcnot_u64_x(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u16_x))) svuint16_t svcnot_u16_x(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s8_x))) svint8_t svcnot_s8_x(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s32_x))) svint32_t svcnot_s32_x(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s64_x))) svint64_t svcnot_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s16_x))) svint16_t svcnot_s16_x(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u8_z))) svuint8_t svcnot_u8_z(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u32_z))) svuint32_t svcnot_u32_z(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u64_z))) svuint64_t svcnot_u64_z(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u16_z))) svuint16_t svcnot_u16_z(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s8_z))) svint8_t svcnot_s8_z(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s32_z))) svint32_t svcnot_s32_z(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s64_z))) svint64_t svcnot_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s16_z))) svint16_t svcnot_s16_z(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u8_m))) svuint8_t svcnt_u8_m(svuint8_t, svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u32_m))) svuint32_t svcnt_u32_m(svuint32_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u64_m))) svuint64_t svcnt_u64_m(svuint64_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u16_m))) svuint16_t svcnt_u16_m(svuint16_t, svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s8_m))) svuint8_t svcnt_s8_m(svuint8_t, svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f64_m))) svuint64_t svcnt_f64_m(svuint64_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f32_m))) svuint32_t svcnt_f32_m(svuint32_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f16_m))) svuint16_t svcnt_f16_m(svuint16_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s32_m))) svuint32_t svcnt_s32_m(svuint32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s64_m))) svuint64_t svcnt_s64_m(svuint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s16_m))) svuint16_t svcnt_s16_m(svuint16_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u8_x))) svuint8_t svcnt_u8_x(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u32_x))) svuint32_t svcnt_u32_x(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u64_x))) svuint64_t svcnt_u64_x(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u16_x))) svuint16_t svcnt_u16_x(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s8_x))) svuint8_t svcnt_s8_x(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f64_x))) svuint64_t svcnt_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f32_x))) svuint32_t svcnt_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f16_x))) svuint16_t svcnt_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s32_x))) svuint32_t svcnt_s32_x(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s64_x))) svuint64_t svcnt_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s16_x))) svuint16_t svcnt_s16_x(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u8_z))) svuint8_t svcnt_u8_z(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u32_z))) svuint32_t svcnt_u32_z(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u64_z))) svuint64_t svcnt_u64_z(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u16_z))) svuint16_t svcnt_u16_z(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s8_z))) svuint8_t svcnt_s8_z(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f64_z))) svuint64_t svcnt_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f32_z))) svuint32_t svcnt_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f16_z))) svuint16_t svcnt_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s32_z))) svuint32_t svcnt_s32_z(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s64_z))) svuint64_t svcnt_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s16_z))) svuint16_t svcnt_s16_z(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntb))) uint64_t svcntb(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntb_pat))) uint64_t svcntb_pat(enum svpattern); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntd))) uint64_t svcntd(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntd_pat))) uint64_t svcntd_pat(enum svpattern); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnth))) uint64_t svcnth(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnth_pat))) uint64_t svcnth_pat(enum svpattern); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_b8))) uint64_t svcntp_b8(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_b32))) uint64_t svcntp_b32(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_b64))) uint64_t svcntp_b64(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_b16))) uint64_t svcntp_b16(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntw))) uint64_t svcntw(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntw_pat))) uint64_t svcntw_pat(enum svpattern); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_u32))) svuint32_t svcompact_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_u64))) svuint64_t svcompact_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_f64))) svfloat64_t svcompact_f64(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_f32))) svfloat32_t svcompact_f32(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s32))) svint32_t svcompact_s32(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s64))) svint64_t svcompact_s64(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_u8))) svuint8x2_t svcreate2_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_u32))) svuint32x2_t svcreate2_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_u64))) svuint64x2_t svcreate2_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_u16))) svuint16x2_t svcreate2_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_s8))) svint8x2_t svcreate2_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_f64))) svfloat64x2_t svcreate2_f64(svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_f32))) svfloat32x2_t svcreate2_f32(svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_f16))) svfloat16x2_t svcreate2_f16(svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_s32))) svint32x2_t svcreate2_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_s64))) svint64x2_t svcreate2_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_s16))) svint16x2_t svcreate2_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_u8))) svuint8x3_t svcreate3_u8(svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_u32))) svuint32x3_t svcreate3_u32(svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_u64))) svuint64x3_t svcreate3_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_u16))) svuint16x3_t svcreate3_u16(svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_s8))) svint8x3_t svcreate3_s8(svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_f64))) svfloat64x3_t svcreate3_f64(svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_f32))) svfloat32x3_t svcreate3_f32(svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_f16))) svfloat16x3_t svcreate3_f16(svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_s32))) svint32x3_t svcreate3_s32(svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_s64))) svint64x3_t svcreate3_s64(svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_s16))) svint16x3_t svcreate3_s16(svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_u8))) svuint8x4_t svcreate4_u8(svuint8_t, svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_u32))) svuint32x4_t svcreate4_u32(svuint32_t, svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_u64))) svuint64x4_t svcreate4_u64(svuint64_t, svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_u16))) svuint16x4_t svcreate4_u16(svuint16_t, svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_s8))) svint8x4_t svcreate4_s8(svint8_t, svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_f64))) svfloat64x4_t svcreate4_f64(svfloat64_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_f32))) svfloat32x4_t svcreate4_f32(svfloat32_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_f16))) svfloat16x4_t svcreate4_f16(svfloat16_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_s32))) svint32x4_t svcreate4_s32(svint32_t, svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_s64))) svint64x4_t svcreate4_s64(svint64_t, svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_s16))) svint16x4_t svcreate4_s16(svint16_t, svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_f32_m))) svfloat16_t svcvt_f16_f32_m(svfloat16_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_f32_x))) svfloat16_t svcvt_f16_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_f32_z))) svfloat16_t svcvt_f16_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_f64_m))) svfloat16_t svcvt_f16_f64_m(svfloat16_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_f64_x))) svfloat16_t svcvt_f16_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_f64_z))) svfloat16_t svcvt_f16_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s16_m))) svfloat16_t svcvt_f16_s16_m(svfloat16_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s16_x))) svfloat16_t svcvt_f16_s16_x(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s16_z))) svfloat16_t svcvt_f16_s16_z(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s32_m))) svfloat16_t svcvt_f16_s32_m(svfloat16_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s32_x))) svfloat16_t svcvt_f16_s32_x(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s32_z))) svfloat16_t svcvt_f16_s32_z(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s64_m))) svfloat16_t svcvt_f16_s64_m(svfloat16_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s64_x))) svfloat16_t svcvt_f16_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s64_z))) svfloat16_t svcvt_f16_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u16_m))) svfloat16_t svcvt_f16_u16_m(svfloat16_t, svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u16_x))) svfloat16_t svcvt_f16_u16_x(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u16_z))) svfloat16_t svcvt_f16_u16_z(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u32_m))) svfloat16_t svcvt_f16_u32_m(svfloat16_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u32_x))) svfloat16_t svcvt_f16_u32_x(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u32_z))) svfloat16_t svcvt_f16_u32_z(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u64_m))) svfloat16_t svcvt_f16_u64_m(svfloat16_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u64_x))) svfloat16_t svcvt_f16_u64_x(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u64_z))) svfloat16_t svcvt_f16_u64_z(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_f16_m))) svfloat32_t svcvt_f32_f16_m(svfloat32_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_f16_x))) svfloat32_t svcvt_f32_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_f16_z))) svfloat32_t svcvt_f32_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_f64_m))) svfloat32_t svcvt_f32_f64_m(svfloat32_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_f64_x))) svfloat32_t svcvt_f32_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_f64_z))) svfloat32_t svcvt_f32_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s32_m))) svfloat32_t svcvt_f32_s32_m(svfloat32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s32_x))) svfloat32_t svcvt_f32_s32_x(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s32_z))) svfloat32_t svcvt_f32_s32_z(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s64_m))) svfloat32_t svcvt_f32_s64_m(svfloat32_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s64_x))) svfloat32_t svcvt_f32_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s64_z))) svfloat32_t svcvt_f32_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u32_m))) svfloat32_t svcvt_f32_u32_m(svfloat32_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u32_x))) svfloat32_t svcvt_f32_u32_x(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u32_z))) svfloat32_t svcvt_f32_u32_z(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u64_m))) svfloat32_t svcvt_f32_u64_m(svfloat32_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u64_x))) svfloat32_t svcvt_f32_u64_x(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u64_z))) svfloat32_t svcvt_f32_u64_z(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_f16_m))) svfloat64_t svcvt_f64_f16_m(svfloat64_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_f16_x))) svfloat64_t svcvt_f64_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_f16_z))) svfloat64_t svcvt_f64_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_f32_m))) svfloat64_t svcvt_f64_f32_m(svfloat64_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_f32_x))) svfloat64_t svcvt_f64_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_f32_z))) svfloat64_t svcvt_f64_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_s32_m))) svfloat64_t svcvt_f64_s32_m(svfloat64_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_s32_x))) svfloat64_t svcvt_f64_s32_x(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_s32_z))) svfloat64_t svcvt_f64_s32_z(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_s64_m))) svfloat64_t svcvt_f64_s64_m(svfloat64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_s64_x))) svfloat64_t svcvt_f64_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_s64_z))) svfloat64_t svcvt_f64_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_u32_m))) svfloat64_t svcvt_f64_u32_m(svfloat64_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_u32_x))) svfloat64_t svcvt_f64_u32_x(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_u32_z))) svfloat64_t svcvt_f64_u32_z(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_u64_m))) svfloat64_t svcvt_f64_u64_m(svfloat64_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_u64_x))) svfloat64_t svcvt_f64_u64_x(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_u64_z))) svfloat64_t svcvt_f64_u64_z(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s16_f16_m))) svint16_t svcvt_s16_f16_m(svint16_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s16_f16_x))) svint16_t svcvt_s16_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s16_f16_z))) svint16_t svcvt_s16_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f16_m))) svint32_t svcvt_s32_f16_m(svint32_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f16_x))) svint32_t svcvt_s32_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f16_z))) svint32_t svcvt_s32_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f32_m))) svint32_t svcvt_s32_f32_m(svint32_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f32_x))) svint32_t svcvt_s32_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f32_z))) svint32_t svcvt_s32_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f64_m))) svint32_t svcvt_s32_f64_m(svint32_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f64_x))) svint32_t svcvt_s32_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f64_z))) svint32_t svcvt_s32_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f16_m))) svint64_t svcvt_s64_f16_m(svint64_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f16_x))) svint64_t svcvt_s64_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f16_z))) svint64_t svcvt_s64_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f32_m))) svint64_t svcvt_s64_f32_m(svint64_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f32_x))) svint64_t svcvt_s64_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f32_z))) svint64_t svcvt_s64_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f64_m))) svint64_t svcvt_s64_f64_m(svint64_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f64_x))) svint64_t svcvt_s64_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f64_z))) svint64_t svcvt_s64_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u16_f16_m))) svuint16_t svcvt_u16_f16_m(svuint16_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u16_f16_x))) svuint16_t svcvt_u16_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u16_f16_z))) svuint16_t svcvt_u16_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f16_m))) svuint32_t svcvt_u32_f16_m(svuint32_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f16_x))) svuint32_t svcvt_u32_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f16_z))) svuint32_t svcvt_u32_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f32_m))) svuint32_t svcvt_u32_f32_m(svuint32_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f32_x))) svuint32_t svcvt_u32_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f32_z))) svuint32_t svcvt_u32_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f64_m))) svuint32_t svcvt_u32_f64_m(svuint32_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f64_x))) svuint32_t svcvt_u32_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f64_z))) svuint32_t svcvt_u32_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f16_m))) svuint64_t svcvt_u64_f16_m(svuint64_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f16_x))) svuint64_t svcvt_u64_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f16_z))) svuint64_t svcvt_u64_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f32_m))) svuint64_t svcvt_u64_f32_m(svuint64_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f32_x))) svuint64_t svcvt_u64_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f32_z))) svuint64_t svcvt_u64_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f64_m))) svuint64_t svcvt_u64_f64_m(svuint64_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f64_x))) svuint64_t svcvt_u64_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f64_z))) svuint64_t svcvt_u64_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f64_m))) svfloat64_t svdiv_n_f64_m(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f32_m))) svfloat32_t svdiv_n_f32_m(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f16_m))) svfloat16_t svdiv_n_f16_m(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f64_x))) svfloat64_t svdiv_n_f64_x(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f32_x))) svfloat32_t svdiv_n_f32_x(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f16_x))) svfloat16_t svdiv_n_f16_x(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f64_z))) svfloat64_t svdiv_n_f64_z(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f32_z))) svfloat32_t svdiv_n_f32_z(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f16_z))) svfloat16_t svdiv_n_f16_z(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_s32_m))) svint32_t svdiv_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_s64_m))) svint64_t svdiv_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_s32_x))) svint32_t svdiv_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_s64_x))) svint64_t svdiv_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_s32_z))) svint32_t svdiv_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_s64_z))) svint64_t svdiv_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_u32_m))) svuint32_t svdiv_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_u64_m))) svuint64_t svdiv_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_u32_x))) svuint32_t svdiv_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_u64_x))) svuint64_t svdiv_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_u32_z))) svuint32_t svdiv_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_u64_z))) svuint64_t svdiv_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f64_m))) svfloat64_t svdiv_f64_m(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f32_m))) svfloat32_t svdiv_f32_m(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f16_m))) svfloat16_t svdiv_f16_m(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f64_x))) svfloat64_t svdiv_f64_x(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f32_x))) svfloat32_t svdiv_f32_x(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f16_x))) svfloat16_t svdiv_f16_x(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f64_z))) svfloat64_t svdiv_f64_z(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f32_z))) svfloat32_t svdiv_f32_z(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f16_z))) svfloat16_t svdiv_f16_z(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_s32_m))) svint32_t svdiv_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_s64_m))) svint64_t svdiv_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_s32_x))) svint32_t svdiv_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_s64_x))) svint64_t svdiv_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_s32_z))) svint32_t svdiv_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_s64_z))) svint64_t svdiv_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_u32_m))) svuint32_t svdiv_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_u64_m))) svuint64_t svdiv_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_u32_x))) svuint32_t svdiv_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_u64_x))) svuint64_t svdiv_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_u32_z))) svuint32_t svdiv_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_u64_z))) svuint64_t svdiv_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f64_m))) svfloat64_t svdivr_n_f64_m(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f32_m))) svfloat32_t svdivr_n_f32_m(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f16_m))) svfloat16_t svdivr_n_f16_m(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f64_x))) svfloat64_t svdivr_n_f64_x(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f32_x))) svfloat32_t svdivr_n_f32_x(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f16_x))) svfloat16_t svdivr_n_f16_x(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f64_z))) svfloat64_t svdivr_n_f64_z(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f32_z))) svfloat32_t svdivr_n_f32_z(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f16_z))) svfloat16_t svdivr_n_f16_z(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_s32_m))) svint32_t svdivr_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_s64_m))) svint64_t svdivr_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_s32_x))) svint32_t svdivr_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_s64_x))) svint64_t svdivr_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_s32_z))) svint32_t svdivr_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_s64_z))) svint64_t svdivr_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_u32_m))) svuint32_t svdivr_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_u64_m))) svuint64_t svdivr_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_u32_x))) svuint32_t svdivr_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_u64_x))) svuint64_t svdivr_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_u32_z))) svuint32_t svdivr_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_u64_z))) svuint64_t svdivr_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f64_m))) svfloat64_t svdivr_f64_m(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f32_m))) svfloat32_t svdivr_f32_m(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f16_m))) svfloat16_t svdivr_f16_m(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f64_x))) svfloat64_t svdivr_f64_x(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f32_x))) svfloat32_t svdivr_f32_x(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f16_x))) svfloat16_t svdivr_f16_x(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f64_z))) svfloat64_t svdivr_f64_z(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f32_z))) svfloat32_t svdivr_f32_z(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f16_z))) svfloat16_t svdivr_f16_z(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_s32_m))) svint32_t svdivr_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_s64_m))) svint64_t svdivr_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_s32_x))) svint32_t svdivr_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_s64_x))) svint64_t svdivr_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_s32_z))) svint32_t svdivr_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_s64_z))) svint64_t svdivr_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_u32_m))) svuint32_t svdivr_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_u64_m))) svuint64_t svdivr_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_u32_x))) svuint32_t svdivr_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_u64_x))) svuint64_t svdivr_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_u32_z))) svuint32_t svdivr_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_u64_z))) svuint64_t svdivr_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_s32))) svint32_t svdot_n_s32(svint32_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_s64))) svint64_t svdot_n_s64(svint64_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_u32))) svuint32_t svdot_n_u32(svuint32_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_u64))) svuint64_t svdot_n_u64(svuint64_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_s32))) svint32_t svdot_s32(svint32_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_s64))) svint64_t svdot_s64(svint64_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_u32))) svuint32_t svdot_u32(svuint32_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_u64))) svuint64_t svdot_u64(svuint64_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_s32))) svint32_t svdot_lane_s32(svint32_t, svint8_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_s64))) svint64_t svdot_lane_s64(svint64_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_u32))) svuint32_t svdot_lane_u32(svuint32_t, svuint8_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_u64))) svuint64_t svdot_lane_u64(svuint64_t, svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u8))) svuint8_t svdup_n_u8(uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u32))) svuint32_t svdup_n_u32(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u64))) svuint64_t svdup_n_u64(uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u16))) svuint16_t svdup_n_u16(uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s8))) svint8_t svdup_n_s8(int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f64))) svfloat64_t svdup_n_f64(float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f32))) svfloat32_t svdup_n_f32(float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f16))) svfloat16_t svdup_n_f16(float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s32))) svint32_t svdup_n_s32(int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s64))) svint64_t svdup_n_s64(int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s16))) svint16_t svdup_n_s16(int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u8_m))) svuint8_t svdup_n_u8_m(svuint8_t, svbool_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u32_m))) svuint32_t svdup_n_u32_m(svuint32_t, svbool_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u64_m))) svuint64_t svdup_n_u64_m(svuint64_t, svbool_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u16_m))) svuint16_t svdup_n_u16_m(svuint16_t, svbool_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s8_m))) svint8_t svdup_n_s8_m(svint8_t, svbool_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f64_m))) svfloat64_t svdup_n_f64_m(svfloat64_t, svbool_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f32_m))) svfloat32_t svdup_n_f32_m(svfloat32_t, svbool_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f16_m))) svfloat16_t svdup_n_f16_m(svfloat16_t, svbool_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s32_m))) svint32_t svdup_n_s32_m(svint32_t, svbool_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s64_m))) svint64_t svdup_n_s64_m(svint64_t, svbool_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s16_m))) svint16_t svdup_n_s16_m(svint16_t, svbool_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_b8))) svbool_t svdup_n_b8(bool); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_b32))) svbool_t svdup_n_b32(bool); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_b64))) svbool_t svdup_n_b64(bool); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_b16))) svbool_t svdup_n_b16(bool); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u8_x))) svuint8_t svdup_n_u8_x(svbool_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u32_x))) svuint32_t svdup_n_u32_x(svbool_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u64_x))) svuint64_t svdup_n_u64_x(svbool_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u16_x))) svuint16_t svdup_n_u16_x(svbool_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s8_x))) svint8_t svdup_n_s8_x(svbool_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f64_x))) svfloat64_t svdup_n_f64_x(svbool_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f32_x))) svfloat32_t svdup_n_f32_x(svbool_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f16_x))) svfloat16_t svdup_n_f16_x(svbool_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s32_x))) svint32_t svdup_n_s32_x(svbool_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s64_x))) svint64_t svdup_n_s64_x(svbool_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s16_x))) svint16_t svdup_n_s16_x(svbool_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u8_z))) svuint8_t svdup_n_u8_z(svbool_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u32_z))) svuint32_t svdup_n_u32_z(svbool_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u64_z))) svuint64_t svdup_n_u64_z(svbool_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u16_z))) svuint16_t svdup_n_u16_z(svbool_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s8_z))) svint8_t svdup_n_s8_z(svbool_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f64_z))) svfloat64_t svdup_n_f64_z(svbool_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f32_z))) svfloat32_t svdup_n_f32_z(svbool_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f16_z))) svfloat16_t svdup_n_f16_z(svbool_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s32_z))) svint32_t svdup_n_s32_z(svbool_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s64_z))) svint64_t svdup_n_s64_z(svbool_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s16_z))) svint16_t svdup_n_s16_z(svbool_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_u8))) svuint8_t svdup_lane_u8(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_u32))) svuint32_t svdup_lane_u32(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_u64))) svuint64_t svdup_lane_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_u16))) svuint16_t svdup_lane_u16(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_s8))) svint8_t svdup_lane_s8(svint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_f64))) svfloat64_t svdup_lane_f64(svfloat64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_f32))) svfloat32_t svdup_lane_f32(svfloat32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_f16))) svfloat16_t svdup_lane_f16(svfloat16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_s32))) svint32_t svdup_lane_s32(svint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_s64))) svint64_t svdup_lane_s64(svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_s16))) svint16_t svdup_lane_s16(svint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_u8))) svuint8_t svdupq_n_u8(uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_s8))) svint8_t svdupq_n_s8(int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_u16))) svuint16_t svdupq_n_u16(uint16_t, uint16_t, uint16_t, uint16_t, uint16_t, uint16_t, uint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_f16))) svfloat16_t svdupq_n_f16(float16_t, float16_t, float16_t, float16_t, float16_t, float16_t, float16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_s16))) svint16_t svdupq_n_s16(int16_t, int16_t, int16_t, int16_t, int16_t, int16_t, int16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_u32))) svuint32_t svdupq_n_u32(uint32_t, uint32_t, uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_f32))) svfloat32_t svdupq_n_f32(float32_t, float32_t, float32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_s32))) svint32_t svdupq_n_s32(int32_t, int32_t, int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_u64))) svuint64_t svdupq_n_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_f64))) svfloat64_t svdupq_n_f64(float64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_s64))) svint64_t svdupq_n_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_b8))) svbool_t svdupq_n_b8(bool, bool, bool, bool, bool, bool, bool, bool, bool, bool, bool, bool, bool, bool, bool, bool); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_b16))) svbool_t svdupq_n_b16(bool, bool, bool, bool, bool, bool, bool, bool); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_b32))) svbool_t svdupq_n_b32(bool, bool, bool, bool); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_b64))) svbool_t svdupq_n_b64(bool, bool); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_u8))) svuint8_t svdupq_lane_u8(svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_u32))) svuint32_t svdupq_lane_u32(svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_u64))) svuint64_t svdupq_lane_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_u16))) svuint16_t svdupq_lane_u16(svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_s8))) svint8_t svdupq_lane_s8(svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_f64))) svfloat64_t svdupq_lane_f64(svfloat64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_f32))) svfloat32_t svdupq_lane_f32(svfloat32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_f16))) svfloat16_t svdupq_lane_f16(svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_s32))) svint32_t svdupq_lane_s32(svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_s64))) svint64_t svdupq_lane_s64(svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_s16))) svint16_t svdupq_lane_s16(svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_b_z))) svbool_t sveor_b_z(svbool_t, svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u8_m))) svuint8_t sveor_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u32_m))) svuint32_t sveor_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u64_m))) svuint64_t sveor_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u16_m))) svuint16_t sveor_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s8_m))) svint8_t sveor_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s32_m))) svint32_t sveor_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s64_m))) svint64_t sveor_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s16_m))) svint16_t sveor_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u8_x))) svuint8_t sveor_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u32_x))) svuint32_t sveor_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u64_x))) svuint64_t sveor_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u16_x))) svuint16_t sveor_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s8_x))) svint8_t sveor_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s32_x))) svint32_t sveor_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s64_x))) svint64_t sveor_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s16_x))) svint16_t sveor_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u8_z))) svuint8_t sveor_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u32_z))) svuint32_t sveor_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u64_z))) svuint64_t sveor_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u16_z))) svuint16_t sveor_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s8_z))) svint8_t sveor_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s32_z))) svint32_t sveor_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s64_z))) svint64_t sveor_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s16_z))) svint16_t sveor_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u8_m))) svuint8_t sveor_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u32_m))) svuint32_t sveor_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u64_m))) svuint64_t sveor_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u16_m))) svuint16_t sveor_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s8_m))) svint8_t sveor_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s32_m))) svint32_t sveor_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s64_m))) svint64_t sveor_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s16_m))) svint16_t sveor_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u8_x))) svuint8_t sveor_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u32_x))) svuint32_t sveor_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u64_x))) svuint64_t sveor_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u16_x))) svuint16_t sveor_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s8_x))) svint8_t sveor_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s32_x))) svint32_t sveor_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s64_x))) svint64_t sveor_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s16_x))) svint16_t sveor_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u8_z))) svuint8_t sveor_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u32_z))) svuint32_t sveor_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u64_z))) svuint64_t sveor_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u16_z))) svuint16_t sveor_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s8_z))) svint8_t sveor_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s32_z))) svint32_t sveor_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s64_z))) svint64_t sveor_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s16_z))) svint16_t sveor_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorv_u8))) uint8_t sveorv_u8(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorv_u32))) uint32_t sveorv_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorv_u64))) uint64_t sveorv_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorv_u16))) uint16_t sveorv_u16(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorv_s8))) int8_t sveorv_s8(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorv_s32))) int32_t sveorv_s32(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorv_s64))) int64_t sveorv_s64(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorv_s16))) int16_t sveorv_s16(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpa_f64))) svfloat64_t svexpa_f64(svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpa_f32))) svfloat32_t svexpa_f32(svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpa_f16))) svfloat16_t svexpa_f16(svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_u8))) svuint8_t svext_u8(svuint8_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_u32))) svuint32_t svext_u32(svuint32_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_u64))) svuint64_t svext_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_u16))) svuint16_t svext_u16(svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_s8))) svint8_t svext_s8(svint8_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_f64))) svfloat64_t svext_f64(svfloat64_t, svfloat64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_f32))) svfloat32_t svext_f32(svfloat32_t, svfloat32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_f16))) svfloat16_t svext_f16(svfloat16_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_s32))) svint32_t svext_s32(svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_s64))) svint64_t svext_s64(svint64_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_s16))) svint16_t svext_s16(svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s32_m))) svint32_t svextb_s32_m(svint32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s64_m))) svint64_t svextb_s64_m(svint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s16_m))) svint16_t svextb_s16_m(svint16_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s32_x))) svint32_t svextb_s32_x(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s64_x))) svint64_t svextb_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s16_x))) svint16_t svextb_s16_x(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s32_z))) svint32_t svextb_s32_z(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s64_z))) svint64_t svextb_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s16_z))) svint16_t svextb_s16_z(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u32_m))) svuint32_t svextb_u32_m(svuint32_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u64_m))) svuint64_t svextb_u64_m(svuint64_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u16_m))) svuint16_t svextb_u16_m(svuint16_t, svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u32_x))) svuint32_t svextb_u32_x(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u64_x))) svuint64_t svextb_u64_x(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u16_x))) svuint16_t svextb_u16_x(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u32_z))) svuint32_t svextb_u32_z(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u64_z))) svuint64_t svextb_u64_z(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u16_z))) svuint16_t svextb_u16_z(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_s32_m))) svint32_t svexth_s32_m(svint32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_s64_m))) svint64_t svexth_s64_m(svint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_s32_x))) svint32_t svexth_s32_x(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_s64_x))) svint64_t svexth_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_s32_z))) svint32_t svexth_s32_z(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_s64_z))) svint64_t svexth_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_u32_m))) svuint32_t svexth_u32_m(svuint32_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_u64_m))) svuint64_t svexth_u64_m(svuint64_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_u32_x))) svuint32_t svexth_u32_x(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_u64_x))) svuint64_t svexth_u64_x(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_u32_z))) svuint32_t svexth_u32_z(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_u64_z))) svuint64_t svexth_u64_z(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextw_s64_m))) svint64_t svextw_s64_m(svint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextw_s64_x))) svint64_t svextw_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextw_s64_z))) svint64_t svextw_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextw_u64_m))) svuint64_t svextw_u64_m(svuint64_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextw_u64_x))) svuint64_t svextw_u64_x(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextw_u64_z))) svuint64_t svextw_u64_z(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_u8))) svuint8_t svget2_u8(svuint8x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_u32))) svuint32_t svget2_u32(svuint32x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_u64))) svuint64_t svget2_u64(svuint64x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_u16))) svuint16_t svget2_u16(svuint16x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_s8))) svint8_t svget2_s8(svint8x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_f64))) svfloat64_t svget2_f64(svfloat64x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_f32))) svfloat32_t svget2_f32(svfloat32x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_f16))) svfloat16_t svget2_f16(svfloat16x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_s32))) svint32_t svget2_s32(svint32x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_s64))) svint64_t svget2_s64(svint64x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_s16))) svint16_t svget2_s16(svint16x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_u8))) svuint8_t svget3_u8(svuint8x3_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_u32))) svuint32_t svget3_u32(svuint32x3_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_u64))) svuint64_t svget3_u64(svuint64x3_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_u16))) svuint16_t svget3_u16(svuint16x3_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_s8))) svint8_t svget3_s8(svint8x3_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_f64))) svfloat64_t svget3_f64(svfloat64x3_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_f32))) svfloat32_t svget3_f32(svfloat32x3_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_f16))) svfloat16_t svget3_f16(svfloat16x3_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_s32))) svint32_t svget3_s32(svint32x3_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_s64))) svint64_t svget3_s64(svint64x3_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_s16))) svint16_t svget3_s16(svint16x3_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_u8))) svuint8_t svget4_u8(svuint8x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_u32))) svuint32_t svget4_u32(svuint32x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_u64))) svuint64_t svget4_u64(svuint64x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_u16))) svuint16_t svget4_u16(svuint16x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_s8))) svint8_t svget4_s8(svint8x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_f64))) svfloat64_t svget4_f64(svfloat64x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_f32))) svfloat32_t svget4_f32(svfloat32x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_f16))) svfloat16_t svget4_f16(svfloat16x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_s32))) svint32_t svget4_s32(svint32x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_s64))) svint64_t svget4_s64(svint64x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_s16))) svint16_t svget4_s16(svint16x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svindex_u8))) svuint8_t svindex_u8(uint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svindex_u32))) svuint32_t svindex_u32(uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svindex_u64))) svuint64_t svindex_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svindex_u16))) svuint16_t svindex_u16(uint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svindex_s8))) svint8_t svindex_s8(int8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svindex_s32))) svint32_t svindex_s32(int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svindex_s64))) svint64_t svindex_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svindex_s16))) svint16_t svindex_s16(int16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_u8))) svuint8_t svinsr_n_u8(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_u32))) svuint32_t svinsr_n_u32(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_u64))) svuint64_t svinsr_n_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_u16))) svuint16_t svinsr_n_u16(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_s8))) svint8_t svinsr_n_s8(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_f64))) svfloat64_t svinsr_n_f64(svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_f32))) svfloat32_t svinsr_n_f32(svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_f16))) svfloat16_t svinsr_n_f16(svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_s32))) svint32_t svinsr_n_s32(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_s64))) svint64_t svinsr_n_s64(svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_s16))) svint16_t svinsr_n_s16(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_u8))) uint8_t svlasta_u8(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_u32))) uint32_t svlasta_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_u64))) uint64_t svlasta_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_u16))) uint16_t svlasta_u16(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_s8))) int8_t svlasta_s8(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_f64))) float64_t svlasta_f64(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_f32))) float32_t svlasta_f32(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_f16))) float16_t svlasta_f16(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_s32))) int32_t svlasta_s32(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_s64))) int64_t svlasta_s64(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_s16))) int16_t svlasta_s16(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_u8))) uint8_t svlastb_u8(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_u32))) uint32_t svlastb_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_u64))) uint64_t svlastb_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_u16))) uint16_t svlastb_u16(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_s8))) int8_t svlastb_s8(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_f64))) float64_t svlastb_f64(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_f32))) float32_t svlastb_f32(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_f16))) float16_t svlastb_f16(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_s32))) int32_t svlastb_s32(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_s64))) int64_t svlastb_s64(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_s16))) int16_t svlastb_s16(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8))) svuint8_t svld1_u8(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u32))) svuint32_t svld1_u32(svbool_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64))) svuint64_t svld1_u64(svbool_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u16))) svuint16_t svld1_u16(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8))) svint8_t svld1_s8(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64))) svfloat64_t svld1_f64(svbool_t, float64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f32))) svfloat32_t svld1_f32(svbool_t, float32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f16))) svfloat16_t svld1_f16(svbool_t, float16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s32))) svint32_t svld1_s32(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s64))) svint64_t svld1_s64(svbool_t, int64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s16))) svint16_t svld1_s16(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_index_u32))) svuint32_t svld1_gather_u32base_index_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_index_u64))) svuint64_t svld1_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_index_f64))) svfloat64_t svld1_gather_u64base_index_f64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_index_f32))) svfloat32_t svld1_gather_u32base_index_f32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_index_s32))) svint32_t svld1_gather_u32base_index_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_index_s64))) svint64_t svld1_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_offset_u32))) svuint32_t svld1_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_offset_u64))) svuint64_t svld1_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_offset_f64))) svfloat64_t svld1_gather_u64base_offset_f64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_offset_f32))) svfloat32_t svld1_gather_u32base_offset_f32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_offset_s32))) svint32_t svld1_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_offset_s64))) svint64_t svld1_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_u32))) svuint32_t svld1_gather_u32base_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_u64))) svuint64_t svld1_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_f64))) svfloat64_t svld1_gather_u64base_f64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_f32))) svfloat32_t svld1_gather_u32base_f32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_s32))) svint32_t svld1_gather_u32base_s32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_s64))) svint64_t svld1_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s32index_u32))) svuint32_t svld1_gather_s32index_u32(svbool_t, uint32_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s32index_f32))) svfloat32_t svld1_gather_s32index_f32(svbool_t, float32_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s32index_s32))) svint32_t svld1_gather_s32index_s32(svbool_t, int32_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32index_u32))) svuint32_t svld1_gather_u32index_u32(svbool_t, uint32_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32index_f32))) svfloat32_t svld1_gather_u32index_f32(svbool_t, float32_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32index_s32))) svint32_t svld1_gather_u32index_s32(svbool_t, int32_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s64index_u64))) svuint64_t svld1_gather_s64index_u64(svbool_t, uint64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s64index_f64))) svfloat64_t svld1_gather_s64index_f64(svbool_t, float64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s64index_s64))) svint64_t svld1_gather_s64index_s64(svbool_t, int64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64index_u64))) svuint64_t svld1_gather_u64index_u64(svbool_t, uint64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64index_f64))) svfloat64_t svld1_gather_u64index_f64(svbool_t, float64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64index_s64))) svint64_t svld1_gather_u64index_s64(svbool_t, int64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s32offset_u32))) svuint32_t svld1_gather_s32offset_u32(svbool_t, uint32_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s32offset_f32))) svfloat32_t svld1_gather_s32offset_f32(svbool_t, float32_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s32offset_s32))) svint32_t svld1_gather_s32offset_s32(svbool_t, int32_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32offset_u32))) svuint32_t svld1_gather_u32offset_u32(svbool_t, uint32_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32offset_f32))) svfloat32_t svld1_gather_u32offset_f32(svbool_t, float32_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32offset_s32))) svint32_t svld1_gather_u32offset_s32(svbool_t, int32_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s64offset_u64))) svuint64_t svld1_gather_s64offset_u64(svbool_t, uint64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s64offset_f64))) svfloat64_t svld1_gather_s64offset_f64(svbool_t, float64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s64offset_s64))) svint64_t svld1_gather_s64offset_s64(svbool_t, int64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64offset_u64))) svuint64_t svld1_gather_u64offset_u64(svbool_t, uint64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64offset_f64))) svfloat64_t svld1_gather_u64offset_f64(svbool_t, float64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64offset_s64))) svint64_t svld1_gather_u64offset_s64(svbool_t, int64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8))) svuint8_t svld1_vnum_u8(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u32))) svuint32_t svld1_vnum_u32(svbool_t, uint32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64))) svuint64_t svld1_vnum_u64(svbool_t, uint64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u16))) svuint16_t svld1_vnum_u16(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8))) svint8_t svld1_vnum_s8(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64))) svfloat64_t svld1_vnum_f64(svbool_t, float64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f32))) svfloat32_t svld1_vnum_f32(svbool_t, float32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f16))) svfloat16_t svld1_vnum_f16(svbool_t, float16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s32))) svint32_t svld1_vnum_s32(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s64))) svint64_t svld1_vnum_s64(svbool_t, int64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s16))) svint16_t svld1_vnum_s16(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_u8))) svuint8_t svld1rq_u8(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_u32))) svuint32_t svld1rq_u32(svbool_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_u64))) svuint64_t svld1rq_u64(svbool_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_u16))) svuint16_t svld1rq_u16(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_s8))) svint8_t svld1rq_s8(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_f64))) svfloat64_t svld1rq_f64(svbool_t, float64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_f32))) svfloat32_t svld1rq_f32(svbool_t, float32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_f16))) svfloat16_t svld1rq_f16(svbool_t, float16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_s32))) svint32_t svld1rq_s32(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_s64))) svint64_t svld1rq_s64(svbool_t, int64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_s16))) svint16_t svld1rq_s16(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u32base_offset_u32))) svuint32_t svld1sb_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u64base_offset_u64))) svuint64_t svld1sb_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u32base_offset_s32))) svint32_t svld1sb_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u64base_offset_s64))) svint64_t svld1sb_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u32base_u32))) svuint32_t svld1sb_gather_u32base_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u64base_u64))) svuint64_t svld1sb_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u32base_s32))) svint32_t svld1sb_gather_u32base_s32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u64base_s64))) svint64_t svld1sb_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_s32offset_u32))) svuint32_t svld1sb_gather_s32offset_u32(svbool_t, int8_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_s32offset_s32))) svint32_t svld1sb_gather_s32offset_s32(svbool_t, int8_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u32offset_u32))) svuint32_t svld1sb_gather_u32offset_u32(svbool_t, int8_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u32offset_s32))) svint32_t svld1sb_gather_u32offset_s32(svbool_t, int8_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_s64offset_u64))) svuint64_t svld1sb_gather_s64offset_u64(svbool_t, int8_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_s64offset_s64))) svint64_t svld1sb_gather_s64offset_s64(svbool_t, int8_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u64offset_u64))) svuint64_t svld1sb_gather_u64offset_u64(svbool_t, int8_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u64offset_s64))) svint64_t svld1sb_gather_u64offset_s64(svbool_t, int8_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_vnum_u32))) svuint32_t svld1sb_vnum_u32(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_vnum_u64))) svuint64_t svld1sb_vnum_u64(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_vnum_u16))) svuint16_t svld1sb_vnum_u16(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_vnum_s32))) svint32_t svld1sb_vnum_s32(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_vnum_s64))) svint64_t svld1sb_vnum_s64(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_vnum_s16))) svint16_t svld1sb_vnum_s16(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_u32))) svuint32_t svld1sb_u32(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_u64))) svuint64_t svld1sb_u64(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_u16))) svuint16_t svld1sb_u16(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_s32))) svint32_t svld1sb_s32(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_s64))) svint64_t svld1sb_s64(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_s16))) svint16_t svld1sb_s16(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32base_index_u32))) svuint32_t svld1sh_gather_u32base_index_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64base_index_u64))) svuint64_t svld1sh_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32base_index_s32))) svint32_t svld1sh_gather_u32base_index_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64base_index_s64))) svint64_t svld1sh_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32base_offset_u32))) svuint32_t svld1sh_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64base_offset_u64))) svuint64_t svld1sh_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32base_offset_s32))) svint32_t svld1sh_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64base_offset_s64))) svint64_t svld1sh_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32base_u32))) svuint32_t svld1sh_gather_u32base_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64base_u64))) svuint64_t svld1sh_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32base_s32))) svint32_t svld1sh_gather_u32base_s32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64base_s64))) svint64_t svld1sh_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_s32index_u32))) svuint32_t svld1sh_gather_s32index_u32(svbool_t, int16_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_s32index_s32))) svint32_t svld1sh_gather_s32index_s32(svbool_t, int16_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32index_u32))) svuint32_t svld1sh_gather_u32index_u32(svbool_t, int16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32index_s32))) svint32_t svld1sh_gather_u32index_s32(svbool_t, int16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_s64index_u64))) svuint64_t svld1sh_gather_s64index_u64(svbool_t, int16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_s64index_s64))) svint64_t svld1sh_gather_s64index_s64(svbool_t, int16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64index_u64))) svuint64_t svld1sh_gather_u64index_u64(svbool_t, int16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64index_s64))) svint64_t svld1sh_gather_u64index_s64(svbool_t, int16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_s32offset_u32))) svuint32_t svld1sh_gather_s32offset_u32(svbool_t, int16_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_s32offset_s32))) svint32_t svld1sh_gather_s32offset_s32(svbool_t, int16_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32offset_u32))) svuint32_t svld1sh_gather_u32offset_u32(svbool_t, int16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32offset_s32))) svint32_t svld1sh_gather_u32offset_s32(svbool_t, int16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_s64offset_u64))) svuint64_t svld1sh_gather_s64offset_u64(svbool_t, int16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_s64offset_s64))) svint64_t svld1sh_gather_s64offset_s64(svbool_t, int16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64offset_u64))) svuint64_t svld1sh_gather_u64offset_u64(svbool_t, int16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64offset_s64))) svint64_t svld1sh_gather_u64offset_s64(svbool_t, int16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_vnum_u32))) svuint32_t svld1sh_vnum_u32(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_vnum_u64))) svuint64_t svld1sh_vnum_u64(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_vnum_s32))) svint32_t svld1sh_vnum_s32(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_vnum_s64))) svint64_t svld1sh_vnum_s64(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_u32))) svuint32_t svld1sh_u32(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_u64))) svuint64_t svld1sh_u64(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_s32))) svint32_t svld1sh_s32(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_s64))) svint64_t svld1sh_s64(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64base_index_u64))) svuint64_t svld1sw_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64base_index_s64))) svint64_t svld1sw_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64base_offset_u64))) svuint64_t svld1sw_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64base_offset_s64))) svint64_t svld1sw_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64base_u64))) svuint64_t svld1sw_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64base_s64))) svint64_t svld1sw_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_s64index_u64))) svuint64_t svld1sw_gather_s64index_u64(svbool_t, int32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_s64index_s64))) svint64_t svld1sw_gather_s64index_s64(svbool_t, int32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64index_u64))) svuint64_t svld1sw_gather_u64index_u64(svbool_t, int32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64index_s64))) svint64_t svld1sw_gather_u64index_s64(svbool_t, int32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_s64offset_u64))) svuint64_t svld1sw_gather_s64offset_u64(svbool_t, int32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_s64offset_s64))) svint64_t svld1sw_gather_s64offset_s64(svbool_t, int32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64offset_u64))) svuint64_t svld1sw_gather_u64offset_u64(svbool_t, int32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64offset_s64))) svint64_t svld1sw_gather_u64offset_s64(svbool_t, int32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_vnum_u64))) svuint64_t svld1sw_vnum_u64(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_vnum_s64))) svint64_t svld1sw_vnum_s64(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_u64))) svuint64_t svld1sw_u64(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_s64))) svint64_t svld1sw_s64(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u32base_offset_u32))) svuint32_t svld1ub_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u64base_offset_u64))) svuint64_t svld1ub_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u32base_offset_s32))) svint32_t svld1ub_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u64base_offset_s64))) svint64_t svld1ub_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u32base_u32))) svuint32_t svld1ub_gather_u32base_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u64base_u64))) svuint64_t svld1ub_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u32base_s32))) svint32_t svld1ub_gather_u32base_s32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u64base_s64))) svint64_t svld1ub_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_s32offset_u32))) svuint32_t svld1ub_gather_s32offset_u32(svbool_t, uint8_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_s32offset_s32))) svint32_t svld1ub_gather_s32offset_s32(svbool_t, uint8_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u32offset_u32))) svuint32_t svld1ub_gather_u32offset_u32(svbool_t, uint8_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u32offset_s32))) svint32_t svld1ub_gather_u32offset_s32(svbool_t, uint8_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_s64offset_u64))) svuint64_t svld1ub_gather_s64offset_u64(svbool_t, uint8_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_s64offset_s64))) svint64_t svld1ub_gather_s64offset_s64(svbool_t, uint8_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u64offset_u64))) svuint64_t svld1ub_gather_u64offset_u64(svbool_t, uint8_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u64offset_s64))) svint64_t svld1ub_gather_u64offset_s64(svbool_t, uint8_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_vnum_u32))) svuint32_t svld1ub_vnum_u32(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_vnum_u64))) svuint64_t svld1ub_vnum_u64(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_vnum_u16))) svuint16_t svld1ub_vnum_u16(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_vnum_s32))) svint32_t svld1ub_vnum_s32(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_vnum_s64))) svint64_t svld1ub_vnum_s64(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_vnum_s16))) svint16_t svld1ub_vnum_s16(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_u32))) svuint32_t svld1ub_u32(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_u64))) svuint64_t svld1ub_u64(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_u16))) svuint16_t svld1ub_u16(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_s32))) svint32_t svld1ub_s32(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_s64))) svint64_t svld1ub_s64(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_s16))) svint16_t svld1ub_s16(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32base_index_u32))) svuint32_t svld1uh_gather_u32base_index_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64base_index_u64))) svuint64_t svld1uh_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32base_index_s32))) svint32_t svld1uh_gather_u32base_index_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64base_index_s64))) svint64_t svld1uh_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32base_offset_u32))) svuint32_t svld1uh_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64base_offset_u64))) svuint64_t svld1uh_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32base_offset_s32))) svint32_t svld1uh_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64base_offset_s64))) svint64_t svld1uh_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32base_u32))) svuint32_t svld1uh_gather_u32base_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64base_u64))) svuint64_t svld1uh_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32base_s32))) svint32_t svld1uh_gather_u32base_s32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64base_s64))) svint64_t svld1uh_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_s32index_u32))) svuint32_t svld1uh_gather_s32index_u32(svbool_t, uint16_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_s32index_s32))) svint32_t svld1uh_gather_s32index_s32(svbool_t, uint16_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32index_u32))) svuint32_t svld1uh_gather_u32index_u32(svbool_t, uint16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32index_s32))) svint32_t svld1uh_gather_u32index_s32(svbool_t, uint16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_s64index_u64))) svuint64_t svld1uh_gather_s64index_u64(svbool_t, uint16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_s64index_s64))) svint64_t svld1uh_gather_s64index_s64(svbool_t, uint16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64index_u64))) svuint64_t svld1uh_gather_u64index_u64(svbool_t, uint16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64index_s64))) svint64_t svld1uh_gather_u64index_s64(svbool_t, uint16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_s32offset_u32))) svuint32_t svld1uh_gather_s32offset_u32(svbool_t, uint16_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_s32offset_s32))) svint32_t svld1uh_gather_s32offset_s32(svbool_t, uint16_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32offset_u32))) svuint32_t svld1uh_gather_u32offset_u32(svbool_t, uint16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32offset_s32))) svint32_t svld1uh_gather_u32offset_s32(svbool_t, uint16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_s64offset_u64))) svuint64_t svld1uh_gather_s64offset_u64(svbool_t, uint16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_s64offset_s64))) svint64_t svld1uh_gather_s64offset_s64(svbool_t, uint16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64offset_u64))) svuint64_t svld1uh_gather_u64offset_u64(svbool_t, uint16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64offset_s64))) svint64_t svld1uh_gather_u64offset_s64(svbool_t, uint16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_vnum_u32))) svuint32_t svld1uh_vnum_u32(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_vnum_u64))) svuint64_t svld1uh_vnum_u64(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_vnum_s32))) svint32_t svld1uh_vnum_s32(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_vnum_s64))) svint64_t svld1uh_vnum_s64(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_u32))) svuint32_t svld1uh_u32(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_u64))) svuint64_t svld1uh_u64(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_s32))) svint32_t svld1uh_s32(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_s64))) svint64_t svld1uh_s64(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64base_index_u64))) svuint64_t svld1uw_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64base_index_s64))) svint64_t svld1uw_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64base_offset_u64))) svuint64_t svld1uw_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64base_offset_s64))) svint64_t svld1uw_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64base_u64))) svuint64_t svld1uw_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64base_s64))) svint64_t svld1uw_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_s64index_u64))) svuint64_t svld1uw_gather_s64index_u64(svbool_t, uint32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_s64index_s64))) svint64_t svld1uw_gather_s64index_s64(svbool_t, uint32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64index_u64))) svuint64_t svld1uw_gather_u64index_u64(svbool_t, uint32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64index_s64))) svint64_t svld1uw_gather_u64index_s64(svbool_t, uint32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_s64offset_u64))) svuint64_t svld1uw_gather_s64offset_u64(svbool_t, uint32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_s64offset_s64))) svint64_t svld1uw_gather_s64offset_s64(svbool_t, uint32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64offset_u64))) svuint64_t svld1uw_gather_u64offset_u64(svbool_t, uint32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64offset_s64))) svint64_t svld1uw_gather_u64offset_s64(svbool_t, uint32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_vnum_u64))) svuint64_t svld1uw_vnum_u64(svbool_t, uint32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_vnum_s64))) svint64_t svld1uw_vnum_s64(svbool_t, uint32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_u64))) svuint64_t svld1uw_u64(svbool_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_s64))) svint64_t svld1uw_s64(svbool_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_u8))) svuint8x2_t svld2_u8(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_u32))) svuint32x2_t svld2_u32(svbool_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_u64))) svuint64x2_t svld2_u64(svbool_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_u16))) svuint16x2_t svld2_u16(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_s8))) svint8x2_t svld2_s8(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_f64))) svfloat64x2_t svld2_f64(svbool_t, float64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_f32))) svfloat32x2_t svld2_f32(svbool_t, float32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_f16))) svfloat16x2_t svld2_f16(svbool_t, float16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_s32))) svint32x2_t svld2_s32(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_s64))) svint64x2_t svld2_s64(svbool_t, int64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_s16))) svint16x2_t svld2_s16(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_u8))) svuint8x2_t svld2_vnum_u8(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_u32))) svuint32x2_t svld2_vnum_u32(svbool_t, uint32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_u64))) svuint64x2_t svld2_vnum_u64(svbool_t, uint64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_u16))) svuint16x2_t svld2_vnum_u16(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_s8))) svint8x2_t svld2_vnum_s8(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_f64))) svfloat64x2_t svld2_vnum_f64(svbool_t, float64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_f32))) svfloat32x2_t svld2_vnum_f32(svbool_t, float32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_f16))) svfloat16x2_t svld2_vnum_f16(svbool_t, float16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_s32))) svint32x2_t svld2_vnum_s32(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_s64))) svint64x2_t svld2_vnum_s64(svbool_t, int64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_s16))) svint16x2_t svld2_vnum_s16(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_u8))) svuint8x3_t svld3_u8(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_u32))) svuint32x3_t svld3_u32(svbool_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_u64))) svuint64x3_t svld3_u64(svbool_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_u16))) svuint16x3_t svld3_u16(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_s8))) svint8x3_t svld3_s8(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_f64))) svfloat64x3_t svld3_f64(svbool_t, float64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_f32))) svfloat32x3_t svld3_f32(svbool_t, float32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_f16))) svfloat16x3_t svld3_f16(svbool_t, float16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_s32))) svint32x3_t svld3_s32(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_s64))) svint64x3_t svld3_s64(svbool_t, int64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_s16))) svint16x3_t svld3_s16(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_u8))) svuint8x3_t svld3_vnum_u8(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_u32))) svuint32x3_t svld3_vnum_u32(svbool_t, uint32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_u64))) svuint64x3_t svld3_vnum_u64(svbool_t, uint64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_u16))) svuint16x3_t svld3_vnum_u16(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_s8))) svint8x3_t svld3_vnum_s8(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_f64))) svfloat64x3_t svld3_vnum_f64(svbool_t, float64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_f32))) svfloat32x3_t svld3_vnum_f32(svbool_t, float32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_f16))) svfloat16x3_t svld3_vnum_f16(svbool_t, float16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_s32))) svint32x3_t svld3_vnum_s32(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_s64))) svint64x3_t svld3_vnum_s64(svbool_t, int64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_s16))) svint16x3_t svld3_vnum_s16(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_u8))) svuint8x4_t svld4_u8(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_u32))) svuint32x4_t svld4_u32(svbool_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_u64))) svuint64x4_t svld4_u64(svbool_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_u16))) svuint16x4_t svld4_u16(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_s8))) svint8x4_t svld4_s8(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_f64))) svfloat64x4_t svld4_f64(svbool_t, float64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_f32))) svfloat32x4_t svld4_f32(svbool_t, float32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_f16))) svfloat16x4_t svld4_f16(svbool_t, float16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_s32))) svint32x4_t svld4_s32(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_s64))) svint64x4_t svld4_s64(svbool_t, int64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_s16))) svint16x4_t svld4_s16(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_u8))) svuint8x4_t svld4_vnum_u8(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_u32))) svuint32x4_t svld4_vnum_u32(svbool_t, uint32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_u64))) svuint64x4_t svld4_vnum_u64(svbool_t, uint64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_u16))) svuint16x4_t svld4_vnum_u16(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_s8))) svint8x4_t svld4_vnum_s8(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_f64))) svfloat64x4_t svld4_vnum_f64(svbool_t, float64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_f32))) svfloat32x4_t svld4_vnum_f32(svbool_t, float32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_f16))) svfloat16x4_t svld4_vnum_f16(svbool_t, float16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_s32))) svint32x4_t svld4_vnum_s32(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_s64))) svint64x4_t svld4_vnum_s64(svbool_t, int64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_s16))) svint16x4_t svld4_vnum_s16(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_u8))) svuint8_t svldff1_u8(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_u32))) svuint32_t svldff1_u32(svbool_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_u64))) svuint64_t svldff1_u64(svbool_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_u16))) svuint16_t svldff1_u16(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_s8))) svint8_t svldff1_s8(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_f64))) svfloat64_t svldff1_f64(svbool_t, float64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_f32))) svfloat32_t svldff1_f32(svbool_t, float32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_f16))) svfloat16_t svldff1_f16(svbool_t, float16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_s32))) svint32_t svldff1_s32(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_s64))) svint64_t svldff1_s64(svbool_t, int64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_s16))) svint16_t svldff1_s16(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_index_u32))) svuint32_t svldff1_gather_u32base_index_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_index_u64))) svuint64_t svldff1_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_index_f64))) svfloat64_t svldff1_gather_u64base_index_f64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_index_f32))) svfloat32_t svldff1_gather_u32base_index_f32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_index_s32))) svint32_t svldff1_gather_u32base_index_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_index_s64))) svint64_t svldff1_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_offset_u32))) svuint32_t svldff1_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_offset_u64))) svuint64_t svldff1_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_offset_f64))) svfloat64_t svldff1_gather_u64base_offset_f64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_offset_f32))) svfloat32_t svldff1_gather_u32base_offset_f32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_offset_s32))) svint32_t svldff1_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_offset_s64))) svint64_t svldff1_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_u32))) svuint32_t svldff1_gather_u32base_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_u64))) svuint64_t svldff1_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_f64))) svfloat64_t svldff1_gather_u64base_f64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_f32))) svfloat32_t svldff1_gather_u32base_f32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_s32))) svint32_t svldff1_gather_u32base_s32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_s64))) svint64_t svldff1_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s32index_u32))) svuint32_t svldff1_gather_s32index_u32(svbool_t, uint32_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s32index_f32))) svfloat32_t svldff1_gather_s32index_f32(svbool_t, float32_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s32index_s32))) svint32_t svldff1_gather_s32index_s32(svbool_t, int32_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32index_u32))) svuint32_t svldff1_gather_u32index_u32(svbool_t, uint32_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32index_f32))) svfloat32_t svldff1_gather_u32index_f32(svbool_t, float32_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32index_s32))) svint32_t svldff1_gather_u32index_s32(svbool_t, int32_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s64index_u64))) svuint64_t svldff1_gather_s64index_u64(svbool_t, uint64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s64index_f64))) svfloat64_t svldff1_gather_s64index_f64(svbool_t, float64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s64index_s64))) svint64_t svldff1_gather_s64index_s64(svbool_t, int64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64index_u64))) svuint64_t svldff1_gather_u64index_u64(svbool_t, uint64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64index_f64))) svfloat64_t svldff1_gather_u64index_f64(svbool_t, float64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64index_s64))) svint64_t svldff1_gather_u64index_s64(svbool_t, int64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s32offset_u32))) svuint32_t svldff1_gather_s32offset_u32(svbool_t, uint32_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s32offset_f32))) svfloat32_t svldff1_gather_s32offset_f32(svbool_t, float32_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s32offset_s32))) svint32_t svldff1_gather_s32offset_s32(svbool_t, int32_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32offset_u32))) svuint32_t svldff1_gather_u32offset_u32(svbool_t, uint32_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32offset_f32))) svfloat32_t svldff1_gather_u32offset_f32(svbool_t, float32_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32offset_s32))) svint32_t svldff1_gather_u32offset_s32(svbool_t, int32_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s64offset_u64))) svuint64_t svldff1_gather_s64offset_u64(svbool_t, uint64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s64offset_f64))) svfloat64_t svldff1_gather_s64offset_f64(svbool_t, float64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s64offset_s64))) svint64_t svldff1_gather_s64offset_s64(svbool_t, int64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64offset_u64))) svuint64_t svldff1_gather_u64offset_u64(svbool_t, uint64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64offset_f64))) svfloat64_t svldff1_gather_u64offset_f64(svbool_t, float64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64offset_s64))) svint64_t svldff1_gather_u64offset_s64(svbool_t, int64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_u8))) svuint8_t svldff1_vnum_u8(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_u32))) svuint32_t svldff1_vnum_u32(svbool_t, uint32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_u64))) svuint64_t svldff1_vnum_u64(svbool_t, uint64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_u16))) svuint16_t svldff1_vnum_u16(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_s8))) svint8_t svldff1_vnum_s8(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_f64))) svfloat64_t svldff1_vnum_f64(svbool_t, float64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_f32))) svfloat32_t svldff1_vnum_f32(svbool_t, float32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_f16))) svfloat16_t svldff1_vnum_f16(svbool_t, float16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_s32))) svint32_t svldff1_vnum_s32(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_s64))) svint64_t svldff1_vnum_s64(svbool_t, int64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_s16))) svint16_t svldff1_vnum_s16(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u32base_offset_u32))) svuint32_t svldff1sb_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u64base_offset_u64))) svuint64_t svldff1sb_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u32base_offset_s32))) svint32_t svldff1sb_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u64base_offset_s64))) svint64_t svldff1sb_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u32base_u32))) svuint32_t svldff1sb_gather_u32base_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u64base_u64))) svuint64_t svldff1sb_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u32base_s32))) svint32_t svldff1sb_gather_u32base_s32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u64base_s64))) svint64_t svldff1sb_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_s32offset_u32))) svuint32_t svldff1sb_gather_s32offset_u32(svbool_t, int8_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_s32offset_s32))) svint32_t svldff1sb_gather_s32offset_s32(svbool_t, int8_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u32offset_u32))) svuint32_t svldff1sb_gather_u32offset_u32(svbool_t, int8_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u32offset_s32))) svint32_t svldff1sb_gather_u32offset_s32(svbool_t, int8_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_s64offset_u64))) svuint64_t svldff1sb_gather_s64offset_u64(svbool_t, int8_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_s64offset_s64))) svint64_t svldff1sb_gather_s64offset_s64(svbool_t, int8_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u64offset_u64))) svuint64_t svldff1sb_gather_u64offset_u64(svbool_t, int8_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u64offset_s64))) svint64_t svldff1sb_gather_u64offset_s64(svbool_t, int8_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_vnum_u32))) svuint32_t svldff1sb_vnum_u32(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_vnum_u64))) svuint64_t svldff1sb_vnum_u64(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_vnum_u16))) svuint16_t svldff1sb_vnum_u16(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_vnum_s32))) svint32_t svldff1sb_vnum_s32(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_vnum_s64))) svint64_t svldff1sb_vnum_s64(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_vnum_s16))) svint16_t svldff1sb_vnum_s16(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_u32))) svuint32_t svldff1sb_u32(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_u64))) svuint64_t svldff1sb_u64(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_u16))) svuint16_t svldff1sb_u16(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_s32))) svint32_t svldff1sb_s32(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_s64))) svint64_t svldff1sb_s64(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_s16))) svint16_t svldff1sb_s16(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32base_index_u32))) svuint32_t svldff1sh_gather_u32base_index_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64base_index_u64))) svuint64_t svldff1sh_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32base_index_s32))) svint32_t svldff1sh_gather_u32base_index_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64base_index_s64))) svint64_t svldff1sh_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32base_offset_u32))) svuint32_t svldff1sh_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64base_offset_u64))) svuint64_t svldff1sh_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32base_offset_s32))) svint32_t svldff1sh_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64base_offset_s64))) svint64_t svldff1sh_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32base_u32))) svuint32_t svldff1sh_gather_u32base_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64base_u64))) svuint64_t svldff1sh_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32base_s32))) svint32_t svldff1sh_gather_u32base_s32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64base_s64))) svint64_t svldff1sh_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_s32index_u32))) svuint32_t svldff1sh_gather_s32index_u32(svbool_t, int16_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_s32index_s32))) svint32_t svldff1sh_gather_s32index_s32(svbool_t, int16_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32index_u32))) svuint32_t svldff1sh_gather_u32index_u32(svbool_t, int16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32index_s32))) svint32_t svldff1sh_gather_u32index_s32(svbool_t, int16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_s64index_u64))) svuint64_t svldff1sh_gather_s64index_u64(svbool_t, int16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_s64index_s64))) svint64_t svldff1sh_gather_s64index_s64(svbool_t, int16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64index_u64))) svuint64_t svldff1sh_gather_u64index_u64(svbool_t, int16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64index_s64))) svint64_t svldff1sh_gather_u64index_s64(svbool_t, int16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_s32offset_u32))) svuint32_t svldff1sh_gather_s32offset_u32(svbool_t, int16_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_s32offset_s32))) svint32_t svldff1sh_gather_s32offset_s32(svbool_t, int16_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32offset_u32))) svuint32_t svldff1sh_gather_u32offset_u32(svbool_t, int16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32offset_s32))) svint32_t svldff1sh_gather_u32offset_s32(svbool_t, int16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_s64offset_u64))) svuint64_t svldff1sh_gather_s64offset_u64(svbool_t, int16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_s64offset_s64))) svint64_t svldff1sh_gather_s64offset_s64(svbool_t, int16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64offset_u64))) svuint64_t svldff1sh_gather_u64offset_u64(svbool_t, int16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64offset_s64))) svint64_t svldff1sh_gather_u64offset_s64(svbool_t, int16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_vnum_u32))) svuint32_t svldff1sh_vnum_u32(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_vnum_u64))) svuint64_t svldff1sh_vnum_u64(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_vnum_s32))) svint32_t svldff1sh_vnum_s32(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_vnum_s64))) svint64_t svldff1sh_vnum_s64(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_u32))) svuint32_t svldff1sh_u32(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_u64))) svuint64_t svldff1sh_u64(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_s32))) svint32_t svldff1sh_s32(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_s64))) svint64_t svldff1sh_s64(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64base_index_u64))) svuint64_t svldff1sw_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64base_index_s64))) svint64_t svldff1sw_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64base_offset_u64))) svuint64_t svldff1sw_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64base_offset_s64))) svint64_t svldff1sw_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64base_u64))) svuint64_t svldff1sw_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64base_s64))) svint64_t svldff1sw_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_s64index_u64))) svuint64_t svldff1sw_gather_s64index_u64(svbool_t, int32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_s64index_s64))) svint64_t svldff1sw_gather_s64index_s64(svbool_t, int32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64index_u64))) svuint64_t svldff1sw_gather_u64index_u64(svbool_t, int32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64index_s64))) svint64_t svldff1sw_gather_u64index_s64(svbool_t, int32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_s64offset_u64))) svuint64_t svldff1sw_gather_s64offset_u64(svbool_t, int32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_s64offset_s64))) svint64_t svldff1sw_gather_s64offset_s64(svbool_t, int32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64offset_u64))) svuint64_t svldff1sw_gather_u64offset_u64(svbool_t, int32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64offset_s64))) svint64_t svldff1sw_gather_u64offset_s64(svbool_t, int32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_vnum_u64))) svuint64_t svldff1sw_vnum_u64(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_vnum_s64))) svint64_t svldff1sw_vnum_s64(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_u64))) svuint64_t svldff1sw_u64(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_s64))) svint64_t svldff1sw_s64(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u32base_offset_u32))) svuint32_t svldff1ub_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u64base_offset_u64))) svuint64_t svldff1ub_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u32base_offset_s32))) svint32_t svldff1ub_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u64base_offset_s64))) svint64_t svldff1ub_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u32base_u32))) svuint32_t svldff1ub_gather_u32base_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u64base_u64))) svuint64_t svldff1ub_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u32base_s32))) svint32_t svldff1ub_gather_u32base_s32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u64base_s64))) svint64_t svldff1ub_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_s32offset_u32))) svuint32_t svldff1ub_gather_s32offset_u32(svbool_t, uint8_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_s32offset_s32))) svint32_t svldff1ub_gather_s32offset_s32(svbool_t, uint8_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u32offset_u32))) svuint32_t svldff1ub_gather_u32offset_u32(svbool_t, uint8_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u32offset_s32))) svint32_t svldff1ub_gather_u32offset_s32(svbool_t, uint8_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_s64offset_u64))) svuint64_t svldff1ub_gather_s64offset_u64(svbool_t, uint8_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_s64offset_s64))) svint64_t svldff1ub_gather_s64offset_s64(svbool_t, uint8_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u64offset_u64))) svuint64_t svldff1ub_gather_u64offset_u64(svbool_t, uint8_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u64offset_s64))) svint64_t svldff1ub_gather_u64offset_s64(svbool_t, uint8_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_vnum_u32))) svuint32_t svldff1ub_vnum_u32(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_vnum_u64))) svuint64_t svldff1ub_vnum_u64(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_vnum_u16))) svuint16_t svldff1ub_vnum_u16(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_vnum_s32))) svint32_t svldff1ub_vnum_s32(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_vnum_s64))) svint64_t svldff1ub_vnum_s64(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_vnum_s16))) svint16_t svldff1ub_vnum_s16(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_u32))) svuint32_t svldff1ub_u32(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_u64))) svuint64_t svldff1ub_u64(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_u16))) svuint16_t svldff1ub_u16(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_s32))) svint32_t svldff1ub_s32(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_s64))) svint64_t svldff1ub_s64(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_s16))) svint16_t svldff1ub_s16(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32base_index_u32))) svuint32_t svldff1uh_gather_u32base_index_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64base_index_u64))) svuint64_t svldff1uh_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32base_index_s32))) svint32_t svldff1uh_gather_u32base_index_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64base_index_s64))) svint64_t svldff1uh_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32base_offset_u32))) svuint32_t svldff1uh_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64base_offset_u64))) svuint64_t svldff1uh_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32base_offset_s32))) svint32_t svldff1uh_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64base_offset_s64))) svint64_t svldff1uh_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32base_u32))) svuint32_t svldff1uh_gather_u32base_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64base_u64))) svuint64_t svldff1uh_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32base_s32))) svint32_t svldff1uh_gather_u32base_s32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64base_s64))) svint64_t svldff1uh_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_s32index_u32))) svuint32_t svldff1uh_gather_s32index_u32(svbool_t, uint16_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_s32index_s32))) svint32_t svldff1uh_gather_s32index_s32(svbool_t, uint16_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32index_u32))) svuint32_t svldff1uh_gather_u32index_u32(svbool_t, uint16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32index_s32))) svint32_t svldff1uh_gather_u32index_s32(svbool_t, uint16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_s64index_u64))) svuint64_t svldff1uh_gather_s64index_u64(svbool_t, uint16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_s64index_s64))) svint64_t svldff1uh_gather_s64index_s64(svbool_t, uint16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64index_u64))) svuint64_t svldff1uh_gather_u64index_u64(svbool_t, uint16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64index_s64))) svint64_t svldff1uh_gather_u64index_s64(svbool_t, uint16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_s32offset_u32))) svuint32_t svldff1uh_gather_s32offset_u32(svbool_t, uint16_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_s32offset_s32))) svint32_t svldff1uh_gather_s32offset_s32(svbool_t, uint16_t const *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32offset_u32))) svuint32_t svldff1uh_gather_u32offset_u32(svbool_t, uint16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32offset_s32))) svint32_t svldff1uh_gather_u32offset_s32(svbool_t, uint16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_s64offset_u64))) svuint64_t svldff1uh_gather_s64offset_u64(svbool_t, uint16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_s64offset_s64))) svint64_t svldff1uh_gather_s64offset_s64(svbool_t, uint16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64offset_u64))) svuint64_t svldff1uh_gather_u64offset_u64(svbool_t, uint16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64offset_s64))) svint64_t svldff1uh_gather_u64offset_s64(svbool_t, uint16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_vnum_u32))) svuint32_t svldff1uh_vnum_u32(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_vnum_u64))) svuint64_t svldff1uh_vnum_u64(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_vnum_s32))) svint32_t svldff1uh_vnum_s32(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_vnum_s64))) svint64_t svldff1uh_vnum_s64(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_u32))) svuint32_t svldff1uh_u32(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_u64))) svuint64_t svldff1uh_u64(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_s32))) svint32_t svldff1uh_s32(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_s64))) svint64_t svldff1uh_s64(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64base_index_u64))) svuint64_t svldff1uw_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64base_index_s64))) svint64_t svldff1uw_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64base_offset_u64))) svuint64_t svldff1uw_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64base_offset_s64))) svint64_t svldff1uw_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64base_u64))) svuint64_t svldff1uw_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64base_s64))) svint64_t svldff1uw_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_s64index_u64))) svuint64_t svldff1uw_gather_s64index_u64(svbool_t, uint32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_s64index_s64))) svint64_t svldff1uw_gather_s64index_s64(svbool_t, uint32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64index_u64))) svuint64_t svldff1uw_gather_u64index_u64(svbool_t, uint32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64index_s64))) svint64_t svldff1uw_gather_u64index_s64(svbool_t, uint32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_s64offset_u64))) svuint64_t svldff1uw_gather_s64offset_u64(svbool_t, uint32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_s64offset_s64))) svint64_t svldff1uw_gather_s64offset_s64(svbool_t, uint32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64offset_u64))) svuint64_t svldff1uw_gather_u64offset_u64(svbool_t, uint32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64offset_s64))) svint64_t svldff1uw_gather_u64offset_s64(svbool_t, uint32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_vnum_u64))) svuint64_t svldff1uw_vnum_u64(svbool_t, uint32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_vnum_s64))) svint64_t svldff1uw_vnum_s64(svbool_t, uint32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_u64))) svuint64_t svldff1uw_u64(svbool_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_s64))) svint64_t svldff1uw_s64(svbool_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_u8))) svuint8_t svldnf1_u8(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_u32))) svuint32_t svldnf1_u32(svbool_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_u64))) svuint64_t svldnf1_u64(svbool_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_u16))) svuint16_t svldnf1_u16(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_s8))) svint8_t svldnf1_s8(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_f64))) svfloat64_t svldnf1_f64(svbool_t, float64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_f32))) svfloat32_t svldnf1_f32(svbool_t, float32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_f16))) svfloat16_t svldnf1_f16(svbool_t, float16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_s32))) svint32_t svldnf1_s32(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_s64))) svint64_t svldnf1_s64(svbool_t, int64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_s16))) svint16_t svldnf1_s16(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_u8))) svuint8_t svldnf1_vnum_u8(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_u32))) svuint32_t svldnf1_vnum_u32(svbool_t, uint32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_u64))) svuint64_t svldnf1_vnum_u64(svbool_t, uint64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_u16))) svuint16_t svldnf1_vnum_u16(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_s8))) svint8_t svldnf1_vnum_s8(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_f64))) svfloat64_t svldnf1_vnum_f64(svbool_t, float64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_f32))) svfloat32_t svldnf1_vnum_f32(svbool_t, float32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_f16))) svfloat16_t svldnf1_vnum_f16(svbool_t, float16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_s32))) svint32_t svldnf1_vnum_s32(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_s64))) svint64_t svldnf1_vnum_s64(svbool_t, int64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_s16))) svint16_t svldnf1_vnum_s16(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sb_vnum_u32))) svuint32_t svldnf1sb_vnum_u32(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sb_vnum_u64))) svuint64_t svldnf1sb_vnum_u64(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sb_vnum_u16))) svuint16_t svldnf1sb_vnum_u16(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sb_vnum_s32))) svint32_t svldnf1sb_vnum_s32(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sb_vnum_s64))) svint64_t svldnf1sb_vnum_s64(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sb_vnum_s16))) svint16_t svldnf1sb_vnum_s16(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sb_u32))) svuint32_t svldnf1sb_u32(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sb_u64))) svuint64_t svldnf1sb_u64(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sb_u16))) svuint16_t svldnf1sb_u16(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sb_s32))) svint32_t svldnf1sb_s32(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sb_s64))) svint64_t svldnf1sb_s64(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sb_s16))) svint16_t svldnf1sb_s16(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sh_vnum_u32))) svuint32_t svldnf1sh_vnum_u32(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sh_vnum_u64))) svuint64_t svldnf1sh_vnum_u64(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sh_vnum_s32))) svint32_t svldnf1sh_vnum_s32(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sh_vnum_s64))) svint64_t svldnf1sh_vnum_s64(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sh_u32))) svuint32_t svldnf1sh_u32(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sh_u64))) svuint64_t svldnf1sh_u64(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sh_s32))) svint32_t svldnf1sh_s32(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sh_s64))) svint64_t svldnf1sh_s64(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sw_vnum_u64))) svuint64_t svldnf1sw_vnum_u64(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sw_vnum_s64))) svint64_t svldnf1sw_vnum_s64(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sw_u64))) svuint64_t svldnf1sw_u64(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sw_s64))) svint64_t svldnf1sw_s64(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1ub_vnum_u32))) svuint32_t svldnf1ub_vnum_u32(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1ub_vnum_u64))) svuint64_t svldnf1ub_vnum_u64(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1ub_vnum_u16))) svuint16_t svldnf1ub_vnum_u16(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1ub_vnum_s32))) svint32_t svldnf1ub_vnum_s32(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1ub_vnum_s64))) svint64_t svldnf1ub_vnum_s64(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1ub_vnum_s16))) svint16_t svldnf1ub_vnum_s16(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1ub_u32))) svuint32_t svldnf1ub_u32(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1ub_u64))) svuint64_t svldnf1ub_u64(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1ub_u16))) svuint16_t svldnf1ub_u16(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1ub_s32))) svint32_t svldnf1ub_s32(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1ub_s64))) svint64_t svldnf1ub_s64(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1ub_s16))) svint16_t svldnf1ub_s16(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1uh_vnum_u32))) svuint32_t svldnf1uh_vnum_u32(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1uh_vnum_u64))) svuint64_t svldnf1uh_vnum_u64(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1uh_vnum_s32))) svint32_t svldnf1uh_vnum_s32(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1uh_vnum_s64))) svint64_t svldnf1uh_vnum_s64(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1uh_u32))) svuint32_t svldnf1uh_u32(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1uh_u64))) svuint64_t svldnf1uh_u64(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1uh_s32))) svint32_t svldnf1uh_s32(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1uh_s64))) svint64_t svldnf1uh_s64(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1uw_vnum_u64))) svuint64_t svldnf1uw_vnum_u64(svbool_t, uint32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1uw_vnum_s64))) svint64_t svldnf1uw_vnum_s64(svbool_t, uint32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1uw_u64))) svuint64_t svldnf1uw_u64(svbool_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1uw_s64))) svint64_t svldnf1uw_s64(svbool_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8))) svuint8_t svldnt1_u8(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u32))) svuint32_t svldnt1_u32(svbool_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64))) svuint64_t svldnt1_u64(svbool_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u16))) svuint16_t svldnt1_u16(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8))) svint8_t svldnt1_s8(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64))) svfloat64_t svldnt1_f64(svbool_t, float64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f32))) svfloat32_t svldnt1_f32(svbool_t, float32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f16))) svfloat16_t svldnt1_f16(svbool_t, float16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s32))) svint32_t svldnt1_s32(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s64))) svint64_t svldnt1_s64(svbool_t, int64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s16))) svint16_t svldnt1_s16(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8))) svuint8_t svldnt1_vnum_u8(svbool_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u32))) svuint32_t svldnt1_vnum_u32(svbool_t, uint32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64))) svuint64_t svldnt1_vnum_u64(svbool_t, uint64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u16))) svuint16_t svldnt1_vnum_u16(svbool_t, uint16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8))) svint8_t svldnt1_vnum_s8(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64))) svfloat64_t svldnt1_vnum_f64(svbool_t, float64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f32))) svfloat32_t svldnt1_vnum_f32(svbool_t, float32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f16))) svfloat16_t svldnt1_vnum_f16(svbool_t, float16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s32))) svint32_t svldnt1_vnum_s32(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s64))) svint64_t svldnt1_vnum_s64(svbool_t, int64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s16))) svint16_t svldnt1_vnum_s16(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_u8))) uint64_t svlen_u8(svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_u32))) uint64_t svlen_u32(svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_u64))) uint64_t svlen_u64(svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_u16))) uint64_t svlen_u16(svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_s8))) uint64_t svlen_s8(svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_f64))) uint64_t svlen_f64(svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_f32))) uint64_t svlen_f32(svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_f16))) uint64_t svlen_f16(svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_s32))) uint64_t svlen_s32(svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_s64))) uint64_t svlen_s64(svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_s16))) uint64_t svlen_s16(svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u8_m))) svuint8_t svlsl_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u32_m))) svuint32_t svlsl_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u64_m))) svuint64_t svlsl_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u16_m))) svuint16_t svlsl_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s8_m))) svint8_t svlsl_n_s8_m(svbool_t, svint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s32_m))) svint32_t svlsl_n_s32_m(svbool_t, svint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s64_m))) svint64_t svlsl_n_s64_m(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s16_m))) svint16_t svlsl_n_s16_m(svbool_t, svint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u8_x))) svuint8_t svlsl_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u32_x))) svuint32_t svlsl_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u64_x))) svuint64_t svlsl_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u16_x))) svuint16_t svlsl_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s8_x))) svint8_t svlsl_n_s8_x(svbool_t, svint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s32_x))) svint32_t svlsl_n_s32_x(svbool_t, svint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s64_x))) svint64_t svlsl_n_s64_x(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s16_x))) svint16_t svlsl_n_s16_x(svbool_t, svint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u8_z))) svuint8_t svlsl_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u32_z))) svuint32_t svlsl_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u64_z))) svuint64_t svlsl_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u16_z))) svuint16_t svlsl_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s8_z))) svint8_t svlsl_n_s8_z(svbool_t, svint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s32_z))) svint32_t svlsl_n_s32_z(svbool_t, svint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s64_z))) svint64_t svlsl_n_s64_z(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s16_z))) svint16_t svlsl_n_s16_z(svbool_t, svint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u8_m))) svuint8_t svlsl_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u32_m))) svuint32_t svlsl_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u64_m))) svuint64_t svlsl_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u16_m))) svuint16_t svlsl_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s8_m))) svint8_t svlsl_s8_m(svbool_t, svint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s32_m))) svint32_t svlsl_s32_m(svbool_t, svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s64_m))) svint64_t svlsl_s64_m(svbool_t, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s16_m))) svint16_t svlsl_s16_m(svbool_t, svint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u8_x))) svuint8_t svlsl_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u32_x))) svuint32_t svlsl_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u64_x))) svuint64_t svlsl_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u16_x))) svuint16_t svlsl_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s8_x))) svint8_t svlsl_s8_x(svbool_t, svint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s32_x))) svint32_t svlsl_s32_x(svbool_t, svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s64_x))) svint64_t svlsl_s64_x(svbool_t, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s16_x))) svint16_t svlsl_s16_x(svbool_t, svint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u8_z))) svuint8_t svlsl_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u32_z))) svuint32_t svlsl_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u64_z))) svuint64_t svlsl_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u16_z))) svuint16_t svlsl_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s8_z))) svint8_t svlsl_s8_z(svbool_t, svint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s32_z))) svint32_t svlsl_s32_z(svbool_t, svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s64_z))) svint64_t svlsl_s64_z(svbool_t, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s16_z))) svint16_t svlsl_s16_z(svbool_t, svint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u8_m))) svuint8_t svlsl_wide_n_u8_m(svbool_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u32_m))) svuint32_t svlsl_wide_n_u32_m(svbool_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u16_m))) svuint16_t svlsl_wide_n_u16_m(svbool_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s8_m))) svint8_t svlsl_wide_n_s8_m(svbool_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s32_m))) svint32_t svlsl_wide_n_s32_m(svbool_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s16_m))) svint16_t svlsl_wide_n_s16_m(svbool_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u8_x))) svuint8_t svlsl_wide_n_u8_x(svbool_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u32_x))) svuint32_t svlsl_wide_n_u32_x(svbool_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u16_x))) svuint16_t svlsl_wide_n_u16_x(svbool_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s8_x))) svint8_t svlsl_wide_n_s8_x(svbool_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s32_x))) svint32_t svlsl_wide_n_s32_x(svbool_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s16_x))) svint16_t svlsl_wide_n_s16_x(svbool_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u8_z))) svuint8_t svlsl_wide_n_u8_z(svbool_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u32_z))) svuint32_t svlsl_wide_n_u32_z(svbool_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u16_z))) svuint16_t svlsl_wide_n_u16_z(svbool_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s8_z))) svint8_t svlsl_wide_n_s8_z(svbool_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s32_z))) svint32_t svlsl_wide_n_s32_z(svbool_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s16_z))) svint16_t svlsl_wide_n_s16_z(svbool_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u8_m))) svuint8_t svlsl_wide_u8_m(svbool_t, svuint8_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u32_m))) svuint32_t svlsl_wide_u32_m(svbool_t, svuint32_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u16_m))) svuint16_t svlsl_wide_u16_m(svbool_t, svuint16_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s8_m))) svint8_t svlsl_wide_s8_m(svbool_t, svint8_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s32_m))) svint32_t svlsl_wide_s32_m(svbool_t, svint32_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s16_m))) svint16_t svlsl_wide_s16_m(svbool_t, svint16_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u8_x))) svuint8_t svlsl_wide_u8_x(svbool_t, svuint8_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u32_x))) svuint32_t svlsl_wide_u32_x(svbool_t, svuint32_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u16_x))) svuint16_t svlsl_wide_u16_x(svbool_t, svuint16_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s8_x))) svint8_t svlsl_wide_s8_x(svbool_t, svint8_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s32_x))) svint32_t svlsl_wide_s32_x(svbool_t, svint32_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s16_x))) svint16_t svlsl_wide_s16_x(svbool_t, svint16_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u8_z))) svuint8_t svlsl_wide_u8_z(svbool_t, svuint8_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u32_z))) svuint32_t svlsl_wide_u32_z(svbool_t, svuint32_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u16_z))) svuint16_t svlsl_wide_u16_z(svbool_t, svuint16_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s8_z))) svint8_t svlsl_wide_s8_z(svbool_t, svint8_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s32_z))) svint32_t svlsl_wide_s32_z(svbool_t, svint32_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s16_z))) svint16_t svlsl_wide_s16_z(svbool_t, svint16_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u8_m))) svuint8_t svlsr_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u32_m))) svuint32_t svlsr_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u64_m))) svuint64_t svlsr_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u16_m))) svuint16_t svlsr_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u8_x))) svuint8_t svlsr_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u32_x))) svuint32_t svlsr_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u64_x))) svuint64_t svlsr_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u16_x))) svuint16_t svlsr_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u8_z))) svuint8_t svlsr_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u32_z))) svuint32_t svlsr_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u64_z))) svuint64_t svlsr_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u16_z))) svuint16_t svlsr_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u8_m))) svuint8_t svlsr_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u32_m))) svuint32_t svlsr_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u64_m))) svuint64_t svlsr_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u16_m))) svuint16_t svlsr_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u8_x))) svuint8_t svlsr_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u32_x))) svuint32_t svlsr_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u64_x))) svuint64_t svlsr_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u16_x))) svuint16_t svlsr_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u8_z))) svuint8_t svlsr_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u32_z))) svuint32_t svlsr_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u64_z))) svuint64_t svlsr_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u16_z))) svuint16_t svlsr_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u8_m))) svuint8_t svlsr_wide_n_u8_m(svbool_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u32_m))) svuint32_t svlsr_wide_n_u32_m(svbool_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u16_m))) svuint16_t svlsr_wide_n_u16_m(svbool_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u8_x))) svuint8_t svlsr_wide_n_u8_x(svbool_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u32_x))) svuint32_t svlsr_wide_n_u32_x(svbool_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u16_x))) svuint16_t svlsr_wide_n_u16_x(svbool_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u8_z))) svuint8_t svlsr_wide_n_u8_z(svbool_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u32_z))) svuint32_t svlsr_wide_n_u32_z(svbool_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u16_z))) svuint16_t svlsr_wide_n_u16_z(svbool_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u8_m))) svuint8_t svlsr_wide_u8_m(svbool_t, svuint8_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u32_m))) svuint32_t svlsr_wide_u32_m(svbool_t, svuint32_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u16_m))) svuint16_t svlsr_wide_u16_m(svbool_t, svuint16_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u8_x))) svuint8_t svlsr_wide_u8_x(svbool_t, svuint8_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u32_x))) svuint32_t svlsr_wide_u32_x(svbool_t, svuint32_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u16_x))) svuint16_t svlsr_wide_u16_x(svbool_t, svuint16_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u8_z))) svuint8_t svlsr_wide_u8_z(svbool_t, svuint8_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u32_z))) svuint32_t svlsr_wide_u32_z(svbool_t, svuint32_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u16_z))) svuint16_t svlsr_wide_u16_z(svbool_t, svuint16_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f64_m))) svfloat64_t svmad_n_f64_m(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f32_m))) svfloat32_t svmad_n_f32_m(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f16_m))) svfloat16_t svmad_n_f16_m(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f64_x))) svfloat64_t svmad_n_f64_x(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f32_x))) svfloat32_t svmad_n_f32_x(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f16_x))) svfloat16_t svmad_n_f16_x(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f64_z))) svfloat64_t svmad_n_f64_z(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f32_z))) svfloat32_t svmad_n_f32_z(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f16_z))) svfloat16_t svmad_n_f16_z(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u8_m))) svuint8_t svmad_n_u8_m(svbool_t, svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u32_m))) svuint32_t svmad_n_u32_m(svbool_t, svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u64_m))) svuint64_t svmad_n_u64_m(svbool_t, svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u16_m))) svuint16_t svmad_n_u16_m(svbool_t, svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s8_m))) svint8_t svmad_n_s8_m(svbool_t, svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s32_m))) svint32_t svmad_n_s32_m(svbool_t, svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s64_m))) svint64_t svmad_n_s64_m(svbool_t, svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s16_m))) svint16_t svmad_n_s16_m(svbool_t, svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u8_x))) svuint8_t svmad_n_u8_x(svbool_t, svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u32_x))) svuint32_t svmad_n_u32_x(svbool_t, svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u64_x))) svuint64_t svmad_n_u64_x(svbool_t, svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u16_x))) svuint16_t svmad_n_u16_x(svbool_t, svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s8_x))) svint8_t svmad_n_s8_x(svbool_t, svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s32_x))) svint32_t svmad_n_s32_x(svbool_t, svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s64_x))) svint64_t svmad_n_s64_x(svbool_t, svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s16_x))) svint16_t svmad_n_s16_x(svbool_t, svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u8_z))) svuint8_t svmad_n_u8_z(svbool_t, svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u32_z))) svuint32_t svmad_n_u32_z(svbool_t, svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u64_z))) svuint64_t svmad_n_u64_z(svbool_t, svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u16_z))) svuint16_t svmad_n_u16_z(svbool_t, svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s8_z))) svint8_t svmad_n_s8_z(svbool_t, svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s32_z))) svint32_t svmad_n_s32_z(svbool_t, svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s64_z))) svint64_t svmad_n_s64_z(svbool_t, svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s16_z))) svint16_t svmad_n_s16_z(svbool_t, svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f64_m))) svfloat64_t svmad_f64_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f32_m))) svfloat32_t svmad_f32_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f16_m))) svfloat16_t svmad_f16_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f64_x))) svfloat64_t svmad_f64_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f32_x))) svfloat32_t svmad_f32_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f16_x))) svfloat16_t svmad_f16_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f64_z))) svfloat64_t svmad_f64_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f32_z))) svfloat32_t svmad_f32_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f16_z))) svfloat16_t svmad_f16_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u8_m))) svuint8_t svmad_u8_m(svbool_t, svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u32_m))) svuint32_t svmad_u32_m(svbool_t, svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u64_m))) svuint64_t svmad_u64_m(svbool_t, svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u16_m))) svuint16_t svmad_u16_m(svbool_t, svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s8_m))) svint8_t svmad_s8_m(svbool_t, svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s32_m))) svint32_t svmad_s32_m(svbool_t, svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s64_m))) svint64_t svmad_s64_m(svbool_t, svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s16_m))) svint16_t svmad_s16_m(svbool_t, svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u8_x))) svuint8_t svmad_u8_x(svbool_t, svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u32_x))) svuint32_t svmad_u32_x(svbool_t, svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u64_x))) svuint64_t svmad_u64_x(svbool_t, svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u16_x))) svuint16_t svmad_u16_x(svbool_t, svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s8_x))) svint8_t svmad_s8_x(svbool_t, svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s32_x))) svint32_t svmad_s32_x(svbool_t, svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s64_x))) svint64_t svmad_s64_x(svbool_t, svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s16_x))) svint16_t svmad_s16_x(svbool_t, svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u8_z))) svuint8_t svmad_u8_z(svbool_t, svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u32_z))) svuint32_t svmad_u32_z(svbool_t, svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u64_z))) svuint64_t svmad_u64_z(svbool_t, svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u16_z))) svuint16_t svmad_u16_z(svbool_t, svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s8_z))) svint8_t svmad_s8_z(svbool_t, svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s32_z))) svint32_t svmad_s32_z(svbool_t, svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s64_z))) svint64_t svmad_s64_z(svbool_t, svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s16_z))) svint16_t svmad_s16_z(svbool_t, svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f64_m))) svfloat64_t svmax_n_f64_m(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f32_m))) svfloat32_t svmax_n_f32_m(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f16_m))) svfloat16_t svmax_n_f16_m(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f64_x))) svfloat64_t svmax_n_f64_x(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f32_x))) svfloat32_t svmax_n_f32_x(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f16_x))) svfloat16_t svmax_n_f16_x(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f64_z))) svfloat64_t svmax_n_f64_z(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f32_z))) svfloat32_t svmax_n_f32_z(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f16_z))) svfloat16_t svmax_n_f16_z(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s8_m))) svint8_t svmax_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s32_m))) svint32_t svmax_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s64_m))) svint64_t svmax_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s16_m))) svint16_t svmax_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s8_x))) svint8_t svmax_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s32_x))) svint32_t svmax_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s64_x))) svint64_t svmax_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s16_x))) svint16_t svmax_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s8_z))) svint8_t svmax_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s32_z))) svint32_t svmax_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s64_z))) svint64_t svmax_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s16_z))) svint16_t svmax_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u8_m))) svuint8_t svmax_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u32_m))) svuint32_t svmax_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u64_m))) svuint64_t svmax_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u16_m))) svuint16_t svmax_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u8_x))) svuint8_t svmax_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u32_x))) svuint32_t svmax_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u64_x))) svuint64_t svmax_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u16_x))) svuint16_t svmax_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u8_z))) svuint8_t svmax_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u32_z))) svuint32_t svmax_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u64_z))) svuint64_t svmax_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u16_z))) svuint16_t svmax_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f64_m))) svfloat64_t svmax_f64_m(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f32_m))) svfloat32_t svmax_f32_m(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f16_m))) svfloat16_t svmax_f16_m(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f64_x))) svfloat64_t svmax_f64_x(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f32_x))) svfloat32_t svmax_f32_x(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f16_x))) svfloat16_t svmax_f16_x(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f64_z))) svfloat64_t svmax_f64_z(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f32_z))) svfloat32_t svmax_f32_z(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f16_z))) svfloat16_t svmax_f16_z(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s8_m))) svint8_t svmax_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s32_m))) svint32_t svmax_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s64_m))) svint64_t svmax_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s16_m))) svint16_t svmax_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s8_x))) svint8_t svmax_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s32_x))) svint32_t svmax_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s64_x))) svint64_t svmax_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s16_x))) svint16_t svmax_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s8_z))) svint8_t svmax_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s32_z))) svint32_t svmax_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s64_z))) svint64_t svmax_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s16_z))) svint16_t svmax_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u8_m))) svuint8_t svmax_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u32_m))) svuint32_t svmax_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u64_m))) svuint64_t svmax_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u16_m))) svuint16_t svmax_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u8_x))) svuint8_t svmax_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u32_x))) svuint32_t svmax_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u64_x))) svuint64_t svmax_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u16_x))) svuint16_t svmax_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u8_z))) svuint8_t svmax_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u32_z))) svuint32_t svmax_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u64_z))) svuint64_t svmax_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u16_z))) svuint16_t svmax_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f64_m))) svfloat64_t svmaxnm_n_f64_m(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f32_m))) svfloat32_t svmaxnm_n_f32_m(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f16_m))) svfloat16_t svmaxnm_n_f16_m(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f64_x))) svfloat64_t svmaxnm_n_f64_x(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f32_x))) svfloat32_t svmaxnm_n_f32_x(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f16_x))) svfloat16_t svmaxnm_n_f16_x(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f64_z))) svfloat64_t svmaxnm_n_f64_z(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f32_z))) svfloat32_t svmaxnm_n_f32_z(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f16_z))) svfloat16_t svmaxnm_n_f16_z(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f64_m))) svfloat64_t svmaxnm_f64_m(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f32_m))) svfloat32_t svmaxnm_f32_m(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f16_m))) svfloat16_t svmaxnm_f16_m(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f64_x))) svfloat64_t svmaxnm_f64_x(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f32_x))) svfloat32_t svmaxnm_f32_x(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f16_x))) svfloat16_t svmaxnm_f16_x(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f64_z))) svfloat64_t svmaxnm_f64_z(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f32_z))) svfloat32_t svmaxnm_f32_z(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f16_z))) svfloat16_t svmaxnm_f16_z(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmv_f64))) float64_t svmaxnmv_f64(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmv_f32))) float32_t svmaxnmv_f32(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmv_f16))) float16_t svmaxnmv_f16(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_f64))) float64_t svmaxv_f64(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_f32))) float32_t svmaxv_f32(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_f16))) float16_t svmaxv_f16(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_s8))) int8_t svmaxv_s8(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_s32))) int32_t svmaxv_s32(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_s64))) int64_t svmaxv_s64(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_s16))) int16_t svmaxv_s16(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_u8))) uint8_t svmaxv_u8(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_u32))) uint32_t svmaxv_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_u64))) uint64_t svmaxv_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_u16))) uint16_t svmaxv_u16(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f64_m))) svfloat64_t svmin_n_f64_m(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f32_m))) svfloat32_t svmin_n_f32_m(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f16_m))) svfloat16_t svmin_n_f16_m(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f64_x))) svfloat64_t svmin_n_f64_x(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f32_x))) svfloat32_t svmin_n_f32_x(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f16_x))) svfloat16_t svmin_n_f16_x(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f64_z))) svfloat64_t svmin_n_f64_z(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f32_z))) svfloat32_t svmin_n_f32_z(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f16_z))) svfloat16_t svmin_n_f16_z(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s8_m))) svint8_t svmin_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s32_m))) svint32_t svmin_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s64_m))) svint64_t svmin_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s16_m))) svint16_t svmin_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s8_x))) svint8_t svmin_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s32_x))) svint32_t svmin_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s64_x))) svint64_t svmin_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s16_x))) svint16_t svmin_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s8_z))) svint8_t svmin_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s32_z))) svint32_t svmin_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s64_z))) svint64_t svmin_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s16_z))) svint16_t svmin_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u8_m))) svuint8_t svmin_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u32_m))) svuint32_t svmin_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u64_m))) svuint64_t svmin_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u16_m))) svuint16_t svmin_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u8_x))) svuint8_t svmin_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u32_x))) svuint32_t svmin_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u64_x))) svuint64_t svmin_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u16_x))) svuint16_t svmin_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u8_z))) svuint8_t svmin_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u32_z))) svuint32_t svmin_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u64_z))) svuint64_t svmin_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u16_z))) svuint16_t svmin_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f64_m))) svfloat64_t svmin_f64_m(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f32_m))) svfloat32_t svmin_f32_m(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f16_m))) svfloat16_t svmin_f16_m(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f64_x))) svfloat64_t svmin_f64_x(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f32_x))) svfloat32_t svmin_f32_x(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f16_x))) svfloat16_t svmin_f16_x(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f64_z))) svfloat64_t svmin_f64_z(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f32_z))) svfloat32_t svmin_f32_z(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f16_z))) svfloat16_t svmin_f16_z(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s8_m))) svint8_t svmin_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s32_m))) svint32_t svmin_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s64_m))) svint64_t svmin_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s16_m))) svint16_t svmin_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s8_x))) svint8_t svmin_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s32_x))) svint32_t svmin_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s64_x))) svint64_t svmin_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s16_x))) svint16_t svmin_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s8_z))) svint8_t svmin_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s32_z))) svint32_t svmin_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s64_z))) svint64_t svmin_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s16_z))) svint16_t svmin_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u8_m))) svuint8_t svmin_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u32_m))) svuint32_t svmin_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u64_m))) svuint64_t svmin_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u16_m))) svuint16_t svmin_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u8_x))) svuint8_t svmin_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u32_x))) svuint32_t svmin_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u64_x))) svuint64_t svmin_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u16_x))) svuint16_t svmin_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u8_z))) svuint8_t svmin_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u32_z))) svuint32_t svmin_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u64_z))) svuint64_t svmin_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u16_z))) svuint16_t svmin_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f64_m))) svfloat64_t svminnm_n_f64_m(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f32_m))) svfloat32_t svminnm_n_f32_m(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f16_m))) svfloat16_t svminnm_n_f16_m(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f64_x))) svfloat64_t svminnm_n_f64_x(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f32_x))) svfloat32_t svminnm_n_f32_x(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f16_x))) svfloat16_t svminnm_n_f16_x(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f64_z))) svfloat64_t svminnm_n_f64_z(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f32_z))) svfloat32_t svminnm_n_f32_z(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f16_z))) svfloat16_t svminnm_n_f16_z(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f64_m))) svfloat64_t svminnm_f64_m(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f32_m))) svfloat32_t svminnm_f32_m(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f16_m))) svfloat16_t svminnm_f16_m(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f64_x))) svfloat64_t svminnm_f64_x(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f32_x))) svfloat32_t svminnm_f32_x(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f16_x))) svfloat16_t svminnm_f16_x(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f64_z))) svfloat64_t svminnm_f64_z(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f32_z))) svfloat32_t svminnm_f32_z(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f16_z))) svfloat16_t svminnm_f16_z(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmv_f64))) float64_t svminnmv_f64(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmv_f32))) float32_t svminnmv_f32(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmv_f16))) float16_t svminnmv_f16(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_f64))) float64_t svminv_f64(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_f32))) float32_t svminv_f32(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_f16))) float16_t svminv_f16(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_s8))) int8_t svminv_s8(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_s32))) int32_t svminv_s32(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_s64))) int64_t svminv_s64(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_s16))) int16_t svminv_s16(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_u8))) uint8_t svminv_u8(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_u32))) uint32_t svminv_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_u64))) uint64_t svminv_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_u16))) uint16_t svminv_u16(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f64_m))) svfloat64_t svmla_n_f64_m(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f32_m))) svfloat32_t svmla_n_f32_m(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f16_m))) svfloat16_t svmla_n_f16_m(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f64_x))) svfloat64_t svmla_n_f64_x(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f32_x))) svfloat32_t svmla_n_f32_x(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f16_x))) svfloat16_t svmla_n_f16_x(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f64_z))) svfloat64_t svmla_n_f64_z(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f32_z))) svfloat32_t svmla_n_f32_z(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f16_z))) svfloat16_t svmla_n_f16_z(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u8_m))) svuint8_t svmla_n_u8_m(svbool_t, svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u32_m))) svuint32_t svmla_n_u32_m(svbool_t, svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u64_m))) svuint64_t svmla_n_u64_m(svbool_t, svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u16_m))) svuint16_t svmla_n_u16_m(svbool_t, svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s8_m))) svint8_t svmla_n_s8_m(svbool_t, svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s32_m))) svint32_t svmla_n_s32_m(svbool_t, svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s64_m))) svint64_t svmla_n_s64_m(svbool_t, svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s16_m))) svint16_t svmla_n_s16_m(svbool_t, svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u8_x))) svuint8_t svmla_n_u8_x(svbool_t, svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u32_x))) svuint32_t svmla_n_u32_x(svbool_t, svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u64_x))) svuint64_t svmla_n_u64_x(svbool_t, svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u16_x))) svuint16_t svmla_n_u16_x(svbool_t, svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s8_x))) svint8_t svmla_n_s8_x(svbool_t, svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s32_x))) svint32_t svmla_n_s32_x(svbool_t, svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s64_x))) svint64_t svmla_n_s64_x(svbool_t, svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s16_x))) svint16_t svmla_n_s16_x(svbool_t, svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u8_z))) svuint8_t svmla_n_u8_z(svbool_t, svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u32_z))) svuint32_t svmla_n_u32_z(svbool_t, svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u64_z))) svuint64_t svmla_n_u64_z(svbool_t, svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u16_z))) svuint16_t svmla_n_u16_z(svbool_t, svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s8_z))) svint8_t svmla_n_s8_z(svbool_t, svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s32_z))) svint32_t svmla_n_s32_z(svbool_t, svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s64_z))) svint64_t svmla_n_s64_z(svbool_t, svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s16_z))) svint16_t svmla_n_s16_z(svbool_t, svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f64_m))) svfloat64_t svmla_f64_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f32_m))) svfloat32_t svmla_f32_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f16_m))) svfloat16_t svmla_f16_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f64_x))) svfloat64_t svmla_f64_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f32_x))) svfloat32_t svmla_f32_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f16_x))) svfloat16_t svmla_f16_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f64_z))) svfloat64_t svmla_f64_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f32_z))) svfloat32_t svmla_f32_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f16_z))) svfloat16_t svmla_f16_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u8_m))) svuint8_t svmla_u8_m(svbool_t, svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u32_m))) svuint32_t svmla_u32_m(svbool_t, svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u64_m))) svuint64_t svmla_u64_m(svbool_t, svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u16_m))) svuint16_t svmla_u16_m(svbool_t, svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s8_m))) svint8_t svmla_s8_m(svbool_t, svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s32_m))) svint32_t svmla_s32_m(svbool_t, svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s64_m))) svint64_t svmla_s64_m(svbool_t, svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s16_m))) svint16_t svmla_s16_m(svbool_t, svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u8_x))) svuint8_t svmla_u8_x(svbool_t, svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u32_x))) svuint32_t svmla_u32_x(svbool_t, svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u64_x))) svuint64_t svmla_u64_x(svbool_t, svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u16_x))) svuint16_t svmla_u16_x(svbool_t, svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s8_x))) svint8_t svmla_s8_x(svbool_t, svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s32_x))) svint32_t svmla_s32_x(svbool_t, svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s64_x))) svint64_t svmla_s64_x(svbool_t, svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s16_x))) svint16_t svmla_s16_x(svbool_t, svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u8_z))) svuint8_t svmla_u8_z(svbool_t, svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u32_z))) svuint32_t svmla_u32_z(svbool_t, svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u64_z))) svuint64_t svmla_u64_z(svbool_t, svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u16_z))) svuint16_t svmla_u16_z(svbool_t, svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s8_z))) svint8_t svmla_s8_z(svbool_t, svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s32_z))) svint32_t svmla_s32_z(svbool_t, svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s64_z))) svint64_t svmla_s64_z(svbool_t, svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s16_z))) svint16_t svmla_s16_z(svbool_t, svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_f64))) svfloat64_t svmla_lane_f64(svfloat64_t, svfloat64_t, svfloat64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_f32))) svfloat32_t svmla_lane_f32(svfloat32_t, svfloat32_t, svfloat32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_f16))) svfloat16_t svmla_lane_f16(svfloat16_t, svfloat16_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f64_m))) svfloat64_t svmls_n_f64_m(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f32_m))) svfloat32_t svmls_n_f32_m(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f16_m))) svfloat16_t svmls_n_f16_m(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f64_x))) svfloat64_t svmls_n_f64_x(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f32_x))) svfloat32_t svmls_n_f32_x(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f16_x))) svfloat16_t svmls_n_f16_x(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f64_z))) svfloat64_t svmls_n_f64_z(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f32_z))) svfloat32_t svmls_n_f32_z(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f16_z))) svfloat16_t svmls_n_f16_z(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u8_m))) svuint8_t svmls_n_u8_m(svbool_t, svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u32_m))) svuint32_t svmls_n_u32_m(svbool_t, svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u64_m))) svuint64_t svmls_n_u64_m(svbool_t, svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u16_m))) svuint16_t svmls_n_u16_m(svbool_t, svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s8_m))) svint8_t svmls_n_s8_m(svbool_t, svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s32_m))) svint32_t svmls_n_s32_m(svbool_t, svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s64_m))) svint64_t svmls_n_s64_m(svbool_t, svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s16_m))) svint16_t svmls_n_s16_m(svbool_t, svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u8_x))) svuint8_t svmls_n_u8_x(svbool_t, svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u32_x))) svuint32_t svmls_n_u32_x(svbool_t, svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u64_x))) svuint64_t svmls_n_u64_x(svbool_t, svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u16_x))) svuint16_t svmls_n_u16_x(svbool_t, svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s8_x))) svint8_t svmls_n_s8_x(svbool_t, svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s32_x))) svint32_t svmls_n_s32_x(svbool_t, svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s64_x))) svint64_t svmls_n_s64_x(svbool_t, svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s16_x))) svint16_t svmls_n_s16_x(svbool_t, svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u8_z))) svuint8_t svmls_n_u8_z(svbool_t, svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u32_z))) svuint32_t svmls_n_u32_z(svbool_t, svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u64_z))) svuint64_t svmls_n_u64_z(svbool_t, svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u16_z))) svuint16_t svmls_n_u16_z(svbool_t, svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s8_z))) svint8_t svmls_n_s8_z(svbool_t, svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s32_z))) svint32_t svmls_n_s32_z(svbool_t, svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s64_z))) svint64_t svmls_n_s64_z(svbool_t, svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s16_z))) svint16_t svmls_n_s16_z(svbool_t, svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f64_m))) svfloat64_t svmls_f64_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f32_m))) svfloat32_t svmls_f32_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f16_m))) svfloat16_t svmls_f16_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f64_x))) svfloat64_t svmls_f64_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f32_x))) svfloat32_t svmls_f32_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f16_x))) svfloat16_t svmls_f16_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f64_z))) svfloat64_t svmls_f64_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f32_z))) svfloat32_t svmls_f32_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f16_z))) svfloat16_t svmls_f16_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u8_m))) svuint8_t svmls_u8_m(svbool_t, svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u32_m))) svuint32_t svmls_u32_m(svbool_t, svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u64_m))) svuint64_t svmls_u64_m(svbool_t, svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u16_m))) svuint16_t svmls_u16_m(svbool_t, svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s8_m))) svint8_t svmls_s8_m(svbool_t, svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s32_m))) svint32_t svmls_s32_m(svbool_t, svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s64_m))) svint64_t svmls_s64_m(svbool_t, svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s16_m))) svint16_t svmls_s16_m(svbool_t, svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u8_x))) svuint8_t svmls_u8_x(svbool_t, svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u32_x))) svuint32_t svmls_u32_x(svbool_t, svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u64_x))) svuint64_t svmls_u64_x(svbool_t, svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u16_x))) svuint16_t svmls_u16_x(svbool_t, svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s8_x))) svint8_t svmls_s8_x(svbool_t, svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s32_x))) svint32_t svmls_s32_x(svbool_t, svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s64_x))) svint64_t svmls_s64_x(svbool_t, svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s16_x))) svint16_t svmls_s16_x(svbool_t, svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u8_z))) svuint8_t svmls_u8_z(svbool_t, svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u32_z))) svuint32_t svmls_u32_z(svbool_t, svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u64_z))) svuint64_t svmls_u64_z(svbool_t, svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u16_z))) svuint16_t svmls_u16_z(svbool_t, svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s8_z))) svint8_t svmls_s8_z(svbool_t, svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s32_z))) svint32_t svmls_s32_z(svbool_t, svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s64_z))) svint64_t svmls_s64_z(svbool_t, svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s16_z))) svint16_t svmls_s16_z(svbool_t, svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_f64))) svfloat64_t svmls_lane_f64(svfloat64_t, svfloat64_t, svfloat64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_f32))) svfloat32_t svmls_lane_f32(svfloat32_t, svfloat32_t, svfloat32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_f16))) svfloat16_t svmls_lane_f16(svfloat16_t, svfloat16_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmov_b_z))) svbool_t svmov_b_z(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f64_m))) svfloat64_t svmsb_n_f64_m(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f32_m))) svfloat32_t svmsb_n_f32_m(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f16_m))) svfloat16_t svmsb_n_f16_m(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f64_x))) svfloat64_t svmsb_n_f64_x(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f32_x))) svfloat32_t svmsb_n_f32_x(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f16_x))) svfloat16_t svmsb_n_f16_x(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f64_z))) svfloat64_t svmsb_n_f64_z(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f32_z))) svfloat32_t svmsb_n_f32_z(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f16_z))) svfloat16_t svmsb_n_f16_z(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u8_m))) svuint8_t svmsb_n_u8_m(svbool_t, svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u32_m))) svuint32_t svmsb_n_u32_m(svbool_t, svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u64_m))) svuint64_t svmsb_n_u64_m(svbool_t, svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u16_m))) svuint16_t svmsb_n_u16_m(svbool_t, svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s8_m))) svint8_t svmsb_n_s8_m(svbool_t, svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s32_m))) svint32_t svmsb_n_s32_m(svbool_t, svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s64_m))) svint64_t svmsb_n_s64_m(svbool_t, svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s16_m))) svint16_t svmsb_n_s16_m(svbool_t, svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u8_x))) svuint8_t svmsb_n_u8_x(svbool_t, svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u32_x))) svuint32_t svmsb_n_u32_x(svbool_t, svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u64_x))) svuint64_t svmsb_n_u64_x(svbool_t, svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u16_x))) svuint16_t svmsb_n_u16_x(svbool_t, svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s8_x))) svint8_t svmsb_n_s8_x(svbool_t, svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s32_x))) svint32_t svmsb_n_s32_x(svbool_t, svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s64_x))) svint64_t svmsb_n_s64_x(svbool_t, svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s16_x))) svint16_t svmsb_n_s16_x(svbool_t, svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u8_z))) svuint8_t svmsb_n_u8_z(svbool_t, svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u32_z))) svuint32_t svmsb_n_u32_z(svbool_t, svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u64_z))) svuint64_t svmsb_n_u64_z(svbool_t, svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u16_z))) svuint16_t svmsb_n_u16_z(svbool_t, svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s8_z))) svint8_t svmsb_n_s8_z(svbool_t, svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s32_z))) svint32_t svmsb_n_s32_z(svbool_t, svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s64_z))) svint64_t svmsb_n_s64_z(svbool_t, svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s16_z))) svint16_t svmsb_n_s16_z(svbool_t, svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f64_m))) svfloat64_t svmsb_f64_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f32_m))) svfloat32_t svmsb_f32_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f16_m))) svfloat16_t svmsb_f16_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f64_x))) svfloat64_t svmsb_f64_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f32_x))) svfloat32_t svmsb_f32_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f16_x))) svfloat16_t svmsb_f16_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f64_z))) svfloat64_t svmsb_f64_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f32_z))) svfloat32_t svmsb_f32_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f16_z))) svfloat16_t svmsb_f16_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u8_m))) svuint8_t svmsb_u8_m(svbool_t, svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u32_m))) svuint32_t svmsb_u32_m(svbool_t, svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u64_m))) svuint64_t svmsb_u64_m(svbool_t, svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u16_m))) svuint16_t svmsb_u16_m(svbool_t, svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s8_m))) svint8_t svmsb_s8_m(svbool_t, svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s32_m))) svint32_t svmsb_s32_m(svbool_t, svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s64_m))) svint64_t svmsb_s64_m(svbool_t, svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s16_m))) svint16_t svmsb_s16_m(svbool_t, svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u8_x))) svuint8_t svmsb_u8_x(svbool_t, svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u32_x))) svuint32_t svmsb_u32_x(svbool_t, svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u64_x))) svuint64_t svmsb_u64_x(svbool_t, svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u16_x))) svuint16_t svmsb_u16_x(svbool_t, svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s8_x))) svint8_t svmsb_s8_x(svbool_t, svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s32_x))) svint32_t svmsb_s32_x(svbool_t, svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s64_x))) svint64_t svmsb_s64_x(svbool_t, svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s16_x))) svint16_t svmsb_s16_x(svbool_t, svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u8_z))) svuint8_t svmsb_u8_z(svbool_t, svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u32_z))) svuint32_t svmsb_u32_z(svbool_t, svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u64_z))) svuint64_t svmsb_u64_z(svbool_t, svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u16_z))) svuint16_t svmsb_u16_z(svbool_t, svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s8_z))) svint8_t svmsb_s8_z(svbool_t, svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s32_z))) svint32_t svmsb_s32_z(svbool_t, svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s64_z))) svint64_t svmsb_s64_z(svbool_t, svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s16_z))) svint16_t svmsb_s16_z(svbool_t, svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f64_m))) svfloat64_t svmul_n_f64_m(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f32_m))) svfloat32_t svmul_n_f32_m(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f16_m))) svfloat16_t svmul_n_f16_m(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f64_x))) svfloat64_t svmul_n_f64_x(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f32_x))) svfloat32_t svmul_n_f32_x(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f16_x))) svfloat16_t svmul_n_f16_x(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f64_z))) svfloat64_t svmul_n_f64_z(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f32_z))) svfloat32_t svmul_n_f32_z(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f16_z))) svfloat16_t svmul_n_f16_z(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u8_m))) svuint8_t svmul_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u32_m))) svuint32_t svmul_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u64_m))) svuint64_t svmul_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u16_m))) svuint16_t svmul_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s8_m))) svint8_t svmul_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s32_m))) svint32_t svmul_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s64_m))) svint64_t svmul_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s16_m))) svint16_t svmul_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u8_x))) svuint8_t svmul_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u32_x))) svuint32_t svmul_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u64_x))) svuint64_t svmul_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u16_x))) svuint16_t svmul_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s8_x))) svint8_t svmul_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s32_x))) svint32_t svmul_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s64_x))) svint64_t svmul_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s16_x))) svint16_t svmul_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u8_z))) svuint8_t svmul_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u32_z))) svuint32_t svmul_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u64_z))) svuint64_t svmul_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u16_z))) svuint16_t svmul_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s8_z))) svint8_t svmul_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s32_z))) svint32_t svmul_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s64_z))) svint64_t svmul_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s16_z))) svint16_t svmul_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f64_m))) svfloat64_t svmul_f64_m(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f32_m))) svfloat32_t svmul_f32_m(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f16_m))) svfloat16_t svmul_f16_m(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f64_x))) svfloat64_t svmul_f64_x(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f32_x))) svfloat32_t svmul_f32_x(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f16_x))) svfloat16_t svmul_f16_x(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f64_z))) svfloat64_t svmul_f64_z(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f32_z))) svfloat32_t svmul_f32_z(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f16_z))) svfloat16_t svmul_f16_z(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u8_m))) svuint8_t svmul_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u32_m))) svuint32_t svmul_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u64_m))) svuint64_t svmul_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u16_m))) svuint16_t svmul_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s8_m))) svint8_t svmul_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s32_m))) svint32_t svmul_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s64_m))) svint64_t svmul_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s16_m))) svint16_t svmul_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u8_x))) svuint8_t svmul_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u32_x))) svuint32_t svmul_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u64_x))) svuint64_t svmul_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u16_x))) svuint16_t svmul_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s8_x))) svint8_t svmul_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s32_x))) svint32_t svmul_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s64_x))) svint64_t svmul_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s16_x))) svint16_t svmul_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u8_z))) svuint8_t svmul_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u32_z))) svuint32_t svmul_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u64_z))) svuint64_t svmul_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u16_z))) svuint16_t svmul_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s8_z))) svint8_t svmul_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s32_z))) svint32_t svmul_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s64_z))) svint64_t svmul_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s16_z))) svint16_t svmul_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_f64))) svfloat64_t svmul_lane_f64(svfloat64_t, svfloat64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_f32))) svfloat32_t svmul_lane_f32(svfloat32_t, svfloat32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_f16))) svfloat16_t svmul_lane_f16(svfloat16_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s8_m))) svint8_t svmulh_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s32_m))) svint32_t svmulh_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s64_m))) svint64_t svmulh_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s16_m))) svint16_t svmulh_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s8_x))) svint8_t svmulh_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s32_x))) svint32_t svmulh_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s64_x))) svint64_t svmulh_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s16_x))) svint16_t svmulh_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s8_z))) svint8_t svmulh_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s32_z))) svint32_t svmulh_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s64_z))) svint64_t svmulh_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s16_z))) svint16_t svmulh_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u8_m))) svuint8_t svmulh_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u32_m))) svuint32_t svmulh_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u64_m))) svuint64_t svmulh_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u16_m))) svuint16_t svmulh_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u8_x))) svuint8_t svmulh_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u32_x))) svuint32_t svmulh_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u64_x))) svuint64_t svmulh_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u16_x))) svuint16_t svmulh_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u8_z))) svuint8_t svmulh_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u32_z))) svuint32_t svmulh_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u64_z))) svuint64_t svmulh_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u16_z))) svuint16_t svmulh_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s8_m))) svint8_t svmulh_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s32_m))) svint32_t svmulh_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s64_m))) svint64_t svmulh_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s16_m))) svint16_t svmulh_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s8_x))) svint8_t svmulh_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s32_x))) svint32_t svmulh_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s64_x))) svint64_t svmulh_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s16_x))) svint16_t svmulh_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s8_z))) svint8_t svmulh_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s32_z))) svint32_t svmulh_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s64_z))) svint64_t svmulh_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s16_z))) svint16_t svmulh_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u8_m))) svuint8_t svmulh_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u32_m))) svuint32_t svmulh_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u64_m))) svuint64_t svmulh_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u16_m))) svuint16_t svmulh_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u8_x))) svuint8_t svmulh_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u32_x))) svuint32_t svmulh_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u64_x))) svuint64_t svmulh_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u16_x))) svuint16_t svmulh_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u8_z))) svuint8_t svmulh_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u32_z))) svuint32_t svmulh_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u64_z))) svuint64_t svmulh_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u16_z))) svuint16_t svmulh_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f64_m))) svfloat64_t svmulx_n_f64_m(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f32_m))) svfloat32_t svmulx_n_f32_m(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f16_m))) svfloat16_t svmulx_n_f16_m(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f64_x))) svfloat64_t svmulx_n_f64_x(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f32_x))) svfloat32_t svmulx_n_f32_x(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f16_x))) svfloat16_t svmulx_n_f16_x(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f64_z))) svfloat64_t svmulx_n_f64_z(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f32_z))) svfloat32_t svmulx_n_f32_z(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f16_z))) svfloat16_t svmulx_n_f16_z(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f64_m))) svfloat64_t svmulx_f64_m(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f32_m))) svfloat32_t svmulx_f32_m(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f16_m))) svfloat16_t svmulx_f16_m(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f64_x))) svfloat64_t svmulx_f64_x(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f32_x))) svfloat32_t svmulx_f32_x(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f16_x))) svfloat16_t svmulx_f16_x(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f64_z))) svfloat64_t svmulx_f64_z(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f32_z))) svfloat32_t svmulx_f32_z(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f16_z))) svfloat16_t svmulx_f16_z(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnand_b_z))) svbool_t svnand_b_z(svbool_t, svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f64_m))) svfloat64_t svneg_f64_m(svfloat64_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f32_m))) svfloat32_t svneg_f32_m(svfloat32_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f16_m))) svfloat16_t svneg_f16_m(svfloat16_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f64_x))) svfloat64_t svneg_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f32_x))) svfloat32_t svneg_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f16_x))) svfloat16_t svneg_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f64_z))) svfloat64_t svneg_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f32_z))) svfloat32_t svneg_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f16_z))) svfloat16_t svneg_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s8_m))) svint8_t svneg_s8_m(svint8_t, svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s32_m))) svint32_t svneg_s32_m(svint32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s64_m))) svint64_t svneg_s64_m(svint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s16_m))) svint16_t svneg_s16_m(svint16_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s8_x))) svint8_t svneg_s8_x(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s32_x))) svint32_t svneg_s32_x(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s64_x))) svint64_t svneg_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s16_x))) svint16_t svneg_s16_x(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s8_z))) svint8_t svneg_s8_z(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s32_z))) svint32_t svneg_s32_z(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s64_z))) svint64_t svneg_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s16_z))) svint16_t svneg_s16_z(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f64_m))) svfloat64_t svnmad_n_f64_m(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f32_m))) svfloat32_t svnmad_n_f32_m(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f16_m))) svfloat16_t svnmad_n_f16_m(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f64_x))) svfloat64_t svnmad_n_f64_x(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f32_x))) svfloat32_t svnmad_n_f32_x(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f16_x))) svfloat16_t svnmad_n_f16_x(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f64_z))) svfloat64_t svnmad_n_f64_z(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f32_z))) svfloat32_t svnmad_n_f32_z(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f16_z))) svfloat16_t svnmad_n_f16_z(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f64_m))) svfloat64_t svnmad_f64_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f32_m))) svfloat32_t svnmad_f32_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f16_m))) svfloat16_t svnmad_f16_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f64_x))) svfloat64_t svnmad_f64_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f32_x))) svfloat32_t svnmad_f32_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f16_x))) svfloat16_t svnmad_f16_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f64_z))) svfloat64_t svnmad_f64_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f32_z))) svfloat32_t svnmad_f32_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f16_z))) svfloat16_t svnmad_f16_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f64_m))) svfloat64_t svnmla_n_f64_m(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f32_m))) svfloat32_t svnmla_n_f32_m(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f16_m))) svfloat16_t svnmla_n_f16_m(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f64_x))) svfloat64_t svnmla_n_f64_x(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f32_x))) svfloat32_t svnmla_n_f32_x(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f16_x))) svfloat16_t svnmla_n_f16_x(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f64_z))) svfloat64_t svnmla_n_f64_z(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f32_z))) svfloat32_t svnmla_n_f32_z(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f16_z))) svfloat16_t svnmla_n_f16_z(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f64_m))) svfloat64_t svnmla_f64_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f32_m))) svfloat32_t svnmla_f32_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f16_m))) svfloat16_t svnmla_f16_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f64_x))) svfloat64_t svnmla_f64_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f32_x))) svfloat32_t svnmla_f32_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f16_x))) svfloat16_t svnmla_f16_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f64_z))) svfloat64_t svnmla_f64_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f32_z))) svfloat32_t svnmla_f32_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f16_z))) svfloat16_t svnmla_f16_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f64_m))) svfloat64_t svnmls_n_f64_m(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f32_m))) svfloat32_t svnmls_n_f32_m(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f16_m))) svfloat16_t svnmls_n_f16_m(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f64_x))) svfloat64_t svnmls_n_f64_x(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f32_x))) svfloat32_t svnmls_n_f32_x(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f16_x))) svfloat16_t svnmls_n_f16_x(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f64_z))) svfloat64_t svnmls_n_f64_z(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f32_z))) svfloat32_t svnmls_n_f32_z(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f16_z))) svfloat16_t svnmls_n_f16_z(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f64_m))) svfloat64_t svnmls_f64_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f32_m))) svfloat32_t svnmls_f32_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f16_m))) svfloat16_t svnmls_f16_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f64_x))) svfloat64_t svnmls_f64_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f32_x))) svfloat32_t svnmls_f32_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f16_x))) svfloat16_t svnmls_f16_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f64_z))) svfloat64_t svnmls_f64_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f32_z))) svfloat32_t svnmls_f32_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f16_z))) svfloat16_t svnmls_f16_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f64_m))) svfloat64_t svnmsb_n_f64_m(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f32_m))) svfloat32_t svnmsb_n_f32_m(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f16_m))) svfloat16_t svnmsb_n_f16_m(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f64_x))) svfloat64_t svnmsb_n_f64_x(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f32_x))) svfloat32_t svnmsb_n_f32_x(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f16_x))) svfloat16_t svnmsb_n_f16_x(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f64_z))) svfloat64_t svnmsb_n_f64_z(svbool_t, svfloat64_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f32_z))) svfloat32_t svnmsb_n_f32_z(svbool_t, svfloat32_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f16_z))) svfloat16_t svnmsb_n_f16_z(svbool_t, svfloat16_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f64_m))) svfloat64_t svnmsb_f64_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f32_m))) svfloat32_t svnmsb_f32_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f16_m))) svfloat16_t svnmsb_f16_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f64_x))) svfloat64_t svnmsb_f64_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f32_x))) svfloat32_t svnmsb_f32_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f16_x))) svfloat16_t svnmsb_f16_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f64_z))) svfloat64_t svnmsb_f64_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f32_z))) svfloat32_t svnmsb_f32_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f16_z))) svfloat16_t svnmsb_f16_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnor_b_z))) svbool_t svnor_b_z(svbool_t, svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_b_z))) svbool_t svnot_b_z(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u8_m))) svuint8_t svnot_u8_m(svuint8_t, svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u32_m))) svuint32_t svnot_u32_m(svuint32_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u64_m))) svuint64_t svnot_u64_m(svuint64_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u16_m))) svuint16_t svnot_u16_m(svuint16_t, svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s8_m))) svint8_t svnot_s8_m(svint8_t, svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s32_m))) svint32_t svnot_s32_m(svint32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s64_m))) svint64_t svnot_s64_m(svint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s16_m))) svint16_t svnot_s16_m(svint16_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u8_x))) svuint8_t svnot_u8_x(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u32_x))) svuint32_t svnot_u32_x(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u64_x))) svuint64_t svnot_u64_x(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u16_x))) svuint16_t svnot_u16_x(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s8_x))) svint8_t svnot_s8_x(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s32_x))) svint32_t svnot_s32_x(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s64_x))) svint64_t svnot_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s16_x))) svint16_t svnot_s16_x(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u8_z))) svuint8_t svnot_u8_z(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u32_z))) svuint32_t svnot_u32_z(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u64_z))) svuint64_t svnot_u64_z(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u16_z))) svuint16_t svnot_u16_z(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s8_z))) svint8_t svnot_s8_z(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s32_z))) svint32_t svnot_s32_z(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s64_z))) svint64_t svnot_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s16_z))) svint16_t svnot_s16_z(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorn_b_z))) svbool_t svorn_b_z(svbool_t, svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_b_z))) svbool_t svorr_b_z(svbool_t, svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u8_m))) svuint8_t svorr_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u32_m))) svuint32_t svorr_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u64_m))) svuint64_t svorr_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u16_m))) svuint16_t svorr_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s8_m))) svint8_t svorr_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s32_m))) svint32_t svorr_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s64_m))) svint64_t svorr_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s16_m))) svint16_t svorr_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u8_x))) svuint8_t svorr_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u32_x))) svuint32_t svorr_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u64_x))) svuint64_t svorr_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u16_x))) svuint16_t svorr_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s8_x))) svint8_t svorr_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s32_x))) svint32_t svorr_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s64_x))) svint64_t svorr_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s16_x))) svint16_t svorr_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u8_z))) svuint8_t svorr_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u32_z))) svuint32_t svorr_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u64_z))) svuint64_t svorr_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u16_z))) svuint16_t svorr_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s8_z))) svint8_t svorr_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s32_z))) svint32_t svorr_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s64_z))) svint64_t svorr_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s16_z))) svint16_t svorr_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u8_m))) svuint8_t svorr_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u32_m))) svuint32_t svorr_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u64_m))) svuint64_t svorr_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u16_m))) svuint16_t svorr_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s8_m))) svint8_t svorr_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s32_m))) svint32_t svorr_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s64_m))) svint64_t svorr_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s16_m))) svint16_t svorr_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u8_x))) svuint8_t svorr_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u32_x))) svuint32_t svorr_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u64_x))) svuint64_t svorr_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u16_x))) svuint16_t svorr_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s8_x))) svint8_t svorr_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s32_x))) svint32_t svorr_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s64_x))) svint64_t svorr_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s16_x))) svint16_t svorr_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u8_z))) svuint8_t svorr_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u32_z))) svuint32_t svorr_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u64_z))) svuint64_t svorr_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u16_z))) svuint16_t svorr_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s8_z))) svint8_t svorr_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s32_z))) svint32_t svorr_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s64_z))) svint64_t svorr_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s16_z))) svint16_t svorr_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorv_u8))) uint8_t svorv_u8(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorv_u32))) uint32_t svorv_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorv_u64))) uint64_t svorv_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorv_u16))) uint16_t svorv_u16(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorv_s8))) int8_t svorv_s8(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorv_s32))) int32_t svorv_s32(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorv_s64))) int64_t svorv_s64(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorv_s16))) int16_t svorv_s16(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpfalse_b))) svbool_t svpfalse_b(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpfirst_b))) svbool_t svpfirst_b(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpnext_b8))) svbool_t svpnext_b8(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpnext_b32))) svbool_t svpnext_b32(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpnext_b64))) svbool_t svpnext_b64(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpnext_b16))) svbool_t svpnext_b16(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb))) void svprfb(svbool_t, void const *, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_u32base))) void svprfb_gather_u32base(svbool_t, svuint32_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_u64base))) void svprfb_gather_u64base(svbool_t, svuint64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_u32base_offset))) void svprfb_gather_u32base_offset(svbool_t, svuint32_t, int64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_u64base_offset))) void svprfb_gather_u64base_offset(svbool_t, svuint64_t, int64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_s32offset))) void svprfb_gather_s32offset(svbool_t, void const *, svint32_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_u32offset))) void svprfb_gather_u32offset(svbool_t, void const *, svuint32_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_s64offset))) void svprfb_gather_s64offset(svbool_t, void const *, svint64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_u64offset))) void svprfb_gather_u64offset(svbool_t, void const *, svuint64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_vnum))) void svprfb_vnum(svbool_t, void const *, int64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd))) void svprfd(svbool_t, void const *, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_u32base))) void svprfd_gather_u32base(svbool_t, svuint32_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_u64base))) void svprfd_gather_u64base(svbool_t, svuint64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_u32base_index))) void svprfd_gather_u32base_index(svbool_t, svuint32_t, int64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_u64base_index))) void svprfd_gather_u64base_index(svbool_t, svuint64_t, int64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_s32index))) void svprfd_gather_s32index(svbool_t, void const *, svint32_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_u32index))) void svprfd_gather_u32index(svbool_t, void const *, svuint32_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_s64index))) void svprfd_gather_s64index(svbool_t, void const *, svint64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_u64index))) void svprfd_gather_u64index(svbool_t, void const *, svuint64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_vnum))) void svprfd_vnum(svbool_t, void const *, int64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh))) void svprfh(svbool_t, void const *, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_u32base))) void svprfh_gather_u32base(svbool_t, svuint32_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_u64base))) void svprfh_gather_u64base(svbool_t, svuint64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_u32base_index))) void svprfh_gather_u32base_index(svbool_t, svuint32_t, int64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_u64base_index))) void svprfh_gather_u64base_index(svbool_t, svuint64_t, int64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_s32index))) void svprfh_gather_s32index(svbool_t, void const *, svint32_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_u32index))) void svprfh_gather_u32index(svbool_t, void const *, svuint32_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_s64index))) void svprfh_gather_s64index(svbool_t, void const *, svint64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_u64index))) void svprfh_gather_u64index(svbool_t, void const *, svuint64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_vnum))) void svprfh_vnum(svbool_t, void const *, int64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw))) void svprfw(svbool_t, void const *, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_u32base))) void svprfw_gather_u32base(svbool_t, svuint32_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_u64base))) void svprfw_gather_u64base(svbool_t, svuint64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_u32base_index))) void svprfw_gather_u32base_index(svbool_t, svuint32_t, int64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_u64base_index))) void svprfw_gather_u64base_index(svbool_t, svuint64_t, int64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_s32index))) void svprfw_gather_s32index(svbool_t, void const *, svint32_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_u32index))) void svprfw_gather_u32index(svbool_t, void const *, svuint32_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_s64index))) void svprfw_gather_s64index(svbool_t, void const *, svint64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_u64index))) void svprfw_gather_u64index(svbool_t, void const *, svuint64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_vnum))) void svprfw_vnum(svbool_t, void const *, int64_t, enum svprfop); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptest_any))) bool svptest_any(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptest_first))) bool svptest_first(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptest_last))) bool svptest_last(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_pat_b8))) svbool_t svptrue_pat_b8(enum svpattern); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_pat_b32))) svbool_t svptrue_pat_b32(enum svpattern); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_pat_b64))) svbool_t svptrue_pat_b64(enum svpattern); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_pat_b16))) svbool_t svptrue_pat_b16(enum svpattern); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_b8))) svbool_t svptrue_b8(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_b32))) svbool_t svptrue_b32(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_b64))) svbool_t svptrue_b64(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_b16))) svbool_t svptrue_b16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s8))) svint8_t svqadd_n_s8(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s32))) svint32_t svqadd_n_s32(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s64))) svint64_t svqadd_n_s64(svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s16))) svint16_t svqadd_n_s16(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u8))) svuint8_t svqadd_n_u8(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u32))) svuint32_t svqadd_n_u32(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u64))) svuint64_t svqadd_n_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u16))) svuint16_t svqadd_n_u16(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s8))) svint8_t svqadd_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s32))) svint32_t svqadd_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s64))) svint64_t svqadd_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s16))) svint16_t svqadd_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u8))) svuint8_t svqadd_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u32))) svuint32_t svqadd_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u64))) svuint64_t svqadd_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u16))) svuint16_t svqadd_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_n_s32))) int32_t svqdecb_n_s32(int32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_n_s64))) int64_t svqdecb_n_s64(int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_n_u32))) uint32_t svqdecb_n_u32(uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_n_u64))) uint64_t svqdecb_n_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_pat_n_s32))) int32_t svqdecb_pat_n_s32(int32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_pat_n_s64))) int64_t svqdecb_pat_n_s64(int64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_pat_n_u32))) uint32_t svqdecb_pat_n_u32(uint32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_pat_n_u64))) uint64_t svqdecb_pat_n_u64(uint64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_n_s32))) int32_t svqdecd_n_s32(int32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_n_s64))) int64_t svqdecd_n_s64(int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_n_u32))) uint32_t svqdecd_n_u32(uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_n_u64))) uint64_t svqdecd_n_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_s64))) svint64_t svqdecd_s64(svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_u64))) svuint64_t svqdecd_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_pat_n_s32))) int32_t svqdecd_pat_n_s32(int32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_pat_n_s64))) int64_t svqdecd_pat_n_s64(int64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_pat_n_u32))) uint32_t svqdecd_pat_n_u32(uint32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_pat_n_u64))) uint64_t svqdecd_pat_n_u64(uint64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_pat_s64))) svint64_t svqdecd_pat_s64(svint64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_pat_u64))) svuint64_t svqdecd_pat_u64(svuint64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_n_s32))) int32_t svqdech_n_s32(int32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_n_s64))) int64_t svqdech_n_s64(int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_n_u32))) uint32_t svqdech_n_u32(uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_n_u64))) uint64_t svqdech_n_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_s16))) svint16_t svqdech_s16(svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_u16))) svuint16_t svqdech_u16(svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_pat_n_s32))) int32_t svqdech_pat_n_s32(int32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_pat_n_s64))) int64_t svqdech_pat_n_s64(int64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_pat_n_u32))) uint32_t svqdech_pat_n_u32(uint32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_pat_n_u64))) uint64_t svqdech_pat_n_u64(uint64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_pat_s16))) svint16_t svqdech_pat_s16(svint16_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_pat_u16))) svuint16_t svqdech_pat_u16(svuint16_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_s32_b8))) int32_t svqdecp_n_s32_b8(int32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_s32_b32))) int32_t svqdecp_n_s32_b32(int32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_s32_b64))) int32_t svqdecp_n_s32_b64(int32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_s32_b16))) int32_t svqdecp_n_s32_b16(int32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_s64_b8))) int64_t svqdecp_n_s64_b8(int64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_s64_b32))) int64_t svqdecp_n_s64_b32(int64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_s64_b64))) int64_t svqdecp_n_s64_b64(int64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_s64_b16))) int64_t svqdecp_n_s64_b16(int64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_u32_b8))) uint32_t svqdecp_n_u32_b8(uint32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_u32_b32))) uint32_t svqdecp_n_u32_b32(uint32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_u32_b64))) uint32_t svqdecp_n_u32_b64(uint32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_u32_b16))) uint32_t svqdecp_n_u32_b16(uint32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_u64_b8))) uint64_t svqdecp_n_u64_b8(uint64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_u64_b32))) uint64_t svqdecp_n_u64_b32(uint64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_u64_b64))) uint64_t svqdecp_n_u64_b64(uint64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_u64_b16))) uint64_t svqdecp_n_u64_b16(uint64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_s32))) svint32_t svqdecp_s32(svint32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_s64))) svint64_t svqdecp_s64(svint64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_s16))) svint16_t svqdecp_s16(svint16_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_u32))) svuint32_t svqdecp_u32(svuint32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_u64))) svuint64_t svqdecp_u64(svuint64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_u16))) svuint16_t svqdecp_u16(svuint16_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_n_s32))) int32_t svqdecw_n_s32(int32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_n_s64))) int64_t svqdecw_n_s64(int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_n_u32))) uint32_t svqdecw_n_u32(uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_n_u64))) uint64_t svqdecw_n_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_s32))) svint32_t svqdecw_s32(svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_u32))) svuint32_t svqdecw_u32(svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_pat_n_s32))) int32_t svqdecw_pat_n_s32(int32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_pat_n_s64))) int64_t svqdecw_pat_n_s64(int64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_pat_n_u32))) uint32_t svqdecw_pat_n_u32(uint32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_pat_n_u64))) uint64_t svqdecw_pat_n_u64(uint64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_pat_s32))) svint32_t svqdecw_pat_s32(svint32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_pat_u32))) svuint32_t svqdecw_pat_u32(svuint32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_n_s32))) int32_t svqincb_n_s32(int32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_n_s64))) int64_t svqincb_n_s64(int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_n_u32))) uint32_t svqincb_n_u32(uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_n_u64))) uint64_t svqincb_n_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_pat_n_s32))) int32_t svqincb_pat_n_s32(int32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_pat_n_s64))) int64_t svqincb_pat_n_s64(int64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_pat_n_u32))) uint32_t svqincb_pat_n_u32(uint32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_pat_n_u64))) uint64_t svqincb_pat_n_u64(uint64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_n_s32))) int32_t svqincd_n_s32(int32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_n_s64))) int64_t svqincd_n_s64(int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_n_u32))) uint32_t svqincd_n_u32(uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_n_u64))) uint64_t svqincd_n_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_s64))) svint64_t svqincd_s64(svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_u64))) svuint64_t svqincd_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_pat_n_s32))) int32_t svqincd_pat_n_s32(int32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_pat_n_s64))) int64_t svqincd_pat_n_s64(int64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_pat_n_u32))) uint32_t svqincd_pat_n_u32(uint32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_pat_n_u64))) uint64_t svqincd_pat_n_u64(uint64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_pat_s64))) svint64_t svqincd_pat_s64(svint64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_pat_u64))) svuint64_t svqincd_pat_u64(svuint64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_n_s32))) int32_t svqinch_n_s32(int32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_n_s64))) int64_t svqinch_n_s64(int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_n_u32))) uint32_t svqinch_n_u32(uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_n_u64))) uint64_t svqinch_n_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_s16))) svint16_t svqinch_s16(svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_u16))) svuint16_t svqinch_u16(svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_pat_n_s32))) int32_t svqinch_pat_n_s32(int32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_pat_n_s64))) int64_t svqinch_pat_n_s64(int64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_pat_n_u32))) uint32_t svqinch_pat_n_u32(uint32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_pat_n_u64))) uint64_t svqinch_pat_n_u64(uint64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_pat_s16))) svint16_t svqinch_pat_s16(svint16_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_pat_u16))) svuint16_t svqinch_pat_u16(svuint16_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_s32_b8))) int32_t svqincp_n_s32_b8(int32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_s32_b32))) int32_t svqincp_n_s32_b32(int32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_s32_b64))) int32_t svqincp_n_s32_b64(int32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_s32_b16))) int32_t svqincp_n_s32_b16(int32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_s64_b8))) int64_t svqincp_n_s64_b8(int64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_s64_b32))) int64_t svqincp_n_s64_b32(int64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_s64_b64))) int64_t svqincp_n_s64_b64(int64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_s64_b16))) int64_t svqincp_n_s64_b16(int64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_u32_b8))) uint32_t svqincp_n_u32_b8(uint32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_u32_b32))) uint32_t svqincp_n_u32_b32(uint32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_u32_b64))) uint32_t svqincp_n_u32_b64(uint32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_u32_b16))) uint32_t svqincp_n_u32_b16(uint32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_u64_b8))) uint64_t svqincp_n_u64_b8(uint64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_u64_b32))) uint64_t svqincp_n_u64_b32(uint64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_u64_b64))) uint64_t svqincp_n_u64_b64(uint64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_u64_b16))) uint64_t svqincp_n_u64_b16(uint64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_s32))) svint32_t svqincp_s32(svint32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_s64))) svint64_t svqincp_s64(svint64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_s16))) svint16_t svqincp_s16(svint16_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_u32))) svuint32_t svqincp_u32(svuint32_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_u64))) svuint64_t svqincp_u64(svuint64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_u16))) svuint16_t svqincp_u16(svuint16_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_n_s32))) int32_t svqincw_n_s32(int32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_n_s64))) int64_t svqincw_n_s64(int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_n_u32))) uint32_t svqincw_n_u32(uint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_n_u64))) uint64_t svqincw_n_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_s32))) svint32_t svqincw_s32(svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_u32))) svuint32_t svqincw_u32(svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_pat_n_s32))) int32_t svqincw_pat_n_s32(int32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_pat_n_s64))) int64_t svqincw_pat_n_s64(int64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_pat_n_u32))) uint32_t svqincw_pat_n_u32(uint32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_pat_n_u64))) uint64_t svqincw_pat_n_u64(uint64_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_pat_s32))) svint32_t svqincw_pat_s32(svint32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_pat_u32))) svuint32_t svqincw_pat_u32(svuint32_t, enum svpattern, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s8))) svint8_t svqsub_n_s8(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s32))) svint32_t svqsub_n_s32(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s64))) svint64_t svqsub_n_s64(svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s16))) svint16_t svqsub_n_s16(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u8))) svuint8_t svqsub_n_u8(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u32))) svuint32_t svqsub_n_u32(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u64))) svuint64_t svqsub_n_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u16))) svuint16_t svqsub_n_u16(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s8))) svint8_t svqsub_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s32))) svint32_t svqsub_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s64))) svint64_t svqsub_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s16))) svint16_t svqsub_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u8))) svuint8_t svqsub_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u32))) svuint32_t svqsub_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u64))) svuint64_t svqsub_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u16))) svuint16_t svqsub_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u8_m))) svuint8_t svrbit_u8_m(svuint8_t, svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u32_m))) svuint32_t svrbit_u32_m(svuint32_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u64_m))) svuint64_t svrbit_u64_m(svuint64_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u16_m))) svuint16_t svrbit_u16_m(svuint16_t, svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s8_m))) svint8_t svrbit_s8_m(svint8_t, svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s32_m))) svint32_t svrbit_s32_m(svint32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s64_m))) svint64_t svrbit_s64_m(svint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s16_m))) svint16_t svrbit_s16_m(svint16_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u8_x))) svuint8_t svrbit_u8_x(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u32_x))) svuint32_t svrbit_u32_x(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u64_x))) svuint64_t svrbit_u64_x(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u16_x))) svuint16_t svrbit_u16_x(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s8_x))) svint8_t svrbit_s8_x(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s32_x))) svint32_t svrbit_s32_x(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s64_x))) svint64_t svrbit_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s16_x))) svint16_t svrbit_s16_x(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u8_z))) svuint8_t svrbit_u8_z(svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u32_z))) svuint32_t svrbit_u32_z(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u64_z))) svuint64_t svrbit_u64_z(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u16_z))) svuint16_t svrbit_u16_z(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s8_z))) svint8_t svrbit_s8_z(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s32_z))) svint32_t svrbit_s32_z(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s64_z))) svint64_t svrbit_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s16_z))) svint16_t svrbit_s16_z(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrdffr))) svbool_t svrdffr(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrdffr_z))) svbool_t svrdffr_z(svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpe_f64))) svfloat64_t svrecpe_f64(svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpe_f32))) svfloat32_t svrecpe_f32(svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpe_f16))) svfloat16_t svrecpe_f16(svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecps_f64))) svfloat64_t svrecps_f64(svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecps_f32))) svfloat32_t svrecps_f32(svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecps_f16))) svfloat16_t svrecps_f16(svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f64_m))) svfloat64_t svrecpx_f64_m(svfloat64_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f32_m))) svfloat32_t svrecpx_f32_m(svfloat32_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f16_m))) svfloat16_t svrecpx_f16_m(svfloat16_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f64_x))) svfloat64_t svrecpx_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f32_x))) svfloat32_t svrecpx_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f16_x))) svfloat16_t svrecpx_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f64_z))) svfloat64_t svrecpx_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f32_z))) svfloat32_t svrecpx_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f16_z))) svfloat16_t svrecpx_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_u8))) svuint8_t svrev_u8(svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_u32))) svuint32_t svrev_u32(svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_u64))) svuint64_t svrev_u64(svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_u16))) svuint16_t svrev_u16(svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_s8))) svint8_t svrev_s8(svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_f64))) svfloat64_t svrev_f64(svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_f32))) svfloat32_t svrev_f32(svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_f16))) svfloat16_t svrev_f16(svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_s32))) svint32_t svrev_s32(svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_s64))) svint64_t svrev_s64(svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_s16))) svint16_t svrev_s16(svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_b16))) svbool_t svrev_b16(svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_b32))) svbool_t svrev_b32(svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_b64))) svbool_t svrev_b64(svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_b8))) svbool_t svrev_b8(svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u32_m))) svuint32_t svrevb_u32_m(svuint32_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u64_m))) svuint64_t svrevb_u64_m(svuint64_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u16_m))) svuint16_t svrevb_u16_m(svuint16_t, svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s32_m))) svint32_t svrevb_s32_m(svint32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s64_m))) svint64_t svrevb_s64_m(svint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s16_m))) svint16_t svrevb_s16_m(svint16_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u32_x))) svuint32_t svrevb_u32_x(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u64_x))) svuint64_t svrevb_u64_x(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u16_x))) svuint16_t svrevb_u16_x(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s32_x))) svint32_t svrevb_s32_x(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s64_x))) svint64_t svrevb_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s16_x))) svint16_t svrevb_s16_x(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u32_z))) svuint32_t svrevb_u32_z(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u64_z))) svuint64_t svrevb_u64_z(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u16_z))) svuint16_t svrevb_u16_z(svbool_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s32_z))) svint32_t svrevb_s32_z(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s64_z))) svint64_t svrevb_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s16_z))) svint16_t svrevb_s16_z(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_u32_m))) svuint32_t svrevh_u32_m(svuint32_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_u64_m))) svuint64_t svrevh_u64_m(svuint64_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_s32_m))) svint32_t svrevh_s32_m(svint32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_s64_m))) svint64_t svrevh_s64_m(svint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_u32_x))) svuint32_t svrevh_u32_x(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_u64_x))) svuint64_t svrevh_u64_x(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_s32_x))) svint32_t svrevh_s32_x(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_s64_x))) svint64_t svrevh_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_u32_z))) svuint32_t svrevh_u32_z(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_u64_z))) svuint64_t svrevh_u64_z(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_s32_z))) svint32_t svrevh_s32_z(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_s64_z))) svint64_t svrevh_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevw_u64_m))) svuint64_t svrevw_u64_m(svuint64_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevw_s64_m))) svint64_t svrevw_s64_m(svint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevw_u64_x))) svuint64_t svrevw_u64_x(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevw_s64_x))) svint64_t svrevw_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevw_u64_z))) svuint64_t svrevw_u64_z(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevw_s64_z))) svint64_t svrevw_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f64_m))) svfloat64_t svrinta_f64_m(svfloat64_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f32_m))) svfloat32_t svrinta_f32_m(svfloat32_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f16_m))) svfloat16_t svrinta_f16_m(svfloat16_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f64_x))) svfloat64_t svrinta_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f32_x))) svfloat32_t svrinta_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f16_x))) svfloat16_t svrinta_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f64_z))) svfloat64_t svrinta_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f32_z))) svfloat32_t svrinta_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f16_z))) svfloat16_t svrinta_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f64_m))) svfloat64_t svrinti_f64_m(svfloat64_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f32_m))) svfloat32_t svrinti_f32_m(svfloat32_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f16_m))) svfloat16_t svrinti_f16_m(svfloat16_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f64_x))) svfloat64_t svrinti_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f32_x))) svfloat32_t svrinti_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f16_x))) svfloat16_t svrinti_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f64_z))) svfloat64_t svrinti_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f32_z))) svfloat32_t svrinti_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f16_z))) svfloat16_t svrinti_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f64_m))) svfloat64_t svrintm_f64_m(svfloat64_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f32_m))) svfloat32_t svrintm_f32_m(svfloat32_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f16_m))) svfloat16_t svrintm_f16_m(svfloat16_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f64_x))) svfloat64_t svrintm_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f32_x))) svfloat32_t svrintm_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f16_x))) svfloat16_t svrintm_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f64_z))) svfloat64_t svrintm_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f32_z))) svfloat32_t svrintm_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f16_z))) svfloat16_t svrintm_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f64_m))) svfloat64_t svrintn_f64_m(svfloat64_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f32_m))) svfloat32_t svrintn_f32_m(svfloat32_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f16_m))) svfloat16_t svrintn_f16_m(svfloat16_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f64_x))) svfloat64_t svrintn_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f32_x))) svfloat32_t svrintn_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f16_x))) svfloat16_t svrintn_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f64_z))) svfloat64_t svrintn_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f32_z))) svfloat32_t svrintn_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f16_z))) svfloat16_t svrintn_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f64_m))) svfloat64_t svrintp_f64_m(svfloat64_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f32_m))) svfloat32_t svrintp_f32_m(svfloat32_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f16_m))) svfloat16_t svrintp_f16_m(svfloat16_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f64_x))) svfloat64_t svrintp_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f32_x))) svfloat32_t svrintp_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f16_x))) svfloat16_t svrintp_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f64_z))) svfloat64_t svrintp_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f32_z))) svfloat32_t svrintp_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f16_z))) svfloat16_t svrintp_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f64_m))) svfloat64_t svrintx_f64_m(svfloat64_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f32_m))) svfloat32_t svrintx_f32_m(svfloat32_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f16_m))) svfloat16_t svrintx_f16_m(svfloat16_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f64_x))) svfloat64_t svrintx_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f32_x))) svfloat32_t svrintx_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f16_x))) svfloat16_t svrintx_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f64_z))) svfloat64_t svrintx_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f32_z))) svfloat32_t svrintx_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f16_z))) svfloat16_t svrintx_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f64_m))) svfloat64_t svrintz_f64_m(svfloat64_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f32_m))) svfloat32_t svrintz_f32_m(svfloat32_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f16_m))) svfloat16_t svrintz_f16_m(svfloat16_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f64_x))) svfloat64_t svrintz_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f32_x))) svfloat32_t svrintz_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f16_x))) svfloat16_t svrintz_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f64_z))) svfloat64_t svrintz_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f32_z))) svfloat32_t svrintz_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f16_z))) svfloat16_t svrintz_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrte_f64))) svfloat64_t svrsqrte_f64(svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrte_f32))) svfloat32_t svrsqrte_f32(svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrte_f16))) svfloat16_t svrsqrte_f16(svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrts_f64))) svfloat64_t svrsqrts_f64(svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrts_f32))) svfloat32_t svrsqrts_f32(svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrts_f16))) svfloat16_t svrsqrts_f16(svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f64_m))) svfloat64_t svscale_n_f64_m(svbool_t, svfloat64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f32_m))) svfloat32_t svscale_n_f32_m(svbool_t, svfloat32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f16_m))) svfloat16_t svscale_n_f16_m(svbool_t, svfloat16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f64_x))) svfloat64_t svscale_n_f64_x(svbool_t, svfloat64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f32_x))) svfloat32_t svscale_n_f32_x(svbool_t, svfloat32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f16_x))) svfloat16_t svscale_n_f16_x(svbool_t, svfloat16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f64_z))) svfloat64_t svscale_n_f64_z(svbool_t, svfloat64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f32_z))) svfloat32_t svscale_n_f32_z(svbool_t, svfloat32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f16_z))) svfloat16_t svscale_n_f16_z(svbool_t, svfloat16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f64_m))) svfloat64_t svscale_f64_m(svbool_t, svfloat64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f32_m))) svfloat32_t svscale_f32_m(svbool_t, svfloat32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f16_m))) svfloat16_t svscale_f16_m(svbool_t, svfloat16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f64_x))) svfloat64_t svscale_f64_x(svbool_t, svfloat64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f32_x))) svfloat32_t svscale_f32_x(svbool_t, svfloat32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f16_x))) svfloat16_t svscale_f16_x(svbool_t, svfloat16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f64_z))) svfloat64_t svscale_f64_z(svbool_t, svfloat64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f32_z))) svfloat32_t svscale_f32_z(svbool_t, svfloat32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f16_z))) svfloat16_t svscale_f16_z(svbool_t, svfloat16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_b))) svbool_t svsel_b(svbool_t, svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u8))) svuint8_t svsel_u8(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u32))) svuint32_t svsel_u32(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u64))) svuint64_t svsel_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u16))) svuint16_t svsel_u16(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s8))) svint8_t svsel_s8(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f64))) svfloat64_t svsel_f64(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f32))) svfloat32_t svsel_f32(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f16))) svfloat16_t svsel_f16(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s32))) svint32_t svsel_s32(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s64))) svint64_t svsel_s64(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s16))) svint16_t svsel_s16(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_u8))) svuint8x2_t svset2_u8(svuint8x2_t, uint64_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_u32))) svuint32x2_t svset2_u32(svuint32x2_t, uint64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_u64))) svuint64x2_t svset2_u64(svuint64x2_t, uint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_u16))) svuint16x2_t svset2_u16(svuint16x2_t, uint64_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_s8))) svint8x2_t svset2_s8(svint8x2_t, uint64_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_f64))) svfloat64x2_t svset2_f64(svfloat64x2_t, uint64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_f32))) svfloat32x2_t svset2_f32(svfloat32x2_t, uint64_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_f16))) svfloat16x2_t svset2_f16(svfloat16x2_t, uint64_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_s32))) svint32x2_t svset2_s32(svint32x2_t, uint64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_s64))) svint64x2_t svset2_s64(svint64x2_t, uint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_s16))) svint16x2_t svset2_s16(svint16x2_t, uint64_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_u8))) svuint8x3_t svset3_u8(svuint8x3_t, uint64_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_u32))) svuint32x3_t svset3_u32(svuint32x3_t, uint64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_u64))) svuint64x3_t svset3_u64(svuint64x3_t, uint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_u16))) svuint16x3_t svset3_u16(svuint16x3_t, uint64_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_s8))) svint8x3_t svset3_s8(svint8x3_t, uint64_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_f64))) svfloat64x3_t svset3_f64(svfloat64x3_t, uint64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_f32))) svfloat32x3_t svset3_f32(svfloat32x3_t, uint64_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_f16))) svfloat16x3_t svset3_f16(svfloat16x3_t, uint64_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_s32))) svint32x3_t svset3_s32(svint32x3_t, uint64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_s64))) svint64x3_t svset3_s64(svint64x3_t, uint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_s16))) svint16x3_t svset3_s16(svint16x3_t, uint64_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_u8))) svuint8x4_t svset4_u8(svuint8x4_t, uint64_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_u32))) svuint32x4_t svset4_u32(svuint32x4_t, uint64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_u64))) svuint64x4_t svset4_u64(svuint64x4_t, uint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_u16))) svuint16x4_t svset4_u16(svuint16x4_t, uint64_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_s8))) svint8x4_t svset4_s8(svint8x4_t, uint64_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_f64))) svfloat64x4_t svset4_f64(svfloat64x4_t, uint64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_f32))) svfloat32x4_t svset4_f32(svfloat32x4_t, uint64_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_f16))) svfloat16x4_t svset4_f16(svfloat16x4_t, uint64_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_s32))) svint32x4_t svset4_s32(svint32x4_t, uint64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_s64))) svint64x4_t svset4_s64(svint64x4_t, uint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_s16))) svint16x4_t svset4_s16(svint16x4_t, uint64_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsetffr))) void svsetffr(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_u8))) svuint8_t svsplice_u8(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_u32))) svuint32_t svsplice_u32(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_u64))) svuint64_t svsplice_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_u16))) svuint16_t svsplice_u16(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_s8))) svint8_t svsplice_s8(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_f64))) svfloat64_t svsplice_f64(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_f32))) svfloat32_t svsplice_f32(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_f16))) svfloat16_t svsplice_f16(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_s32))) svint32_t svsplice_s32(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_s64))) svint64_t svsplice_s64(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_s16))) svint16_t svsplice_s16(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f64_m))) svfloat64_t svsqrt_f64_m(svfloat64_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f32_m))) svfloat32_t svsqrt_f32_m(svfloat32_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f16_m))) svfloat16_t svsqrt_f16_m(svfloat16_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f64_x))) svfloat64_t svsqrt_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f32_x))) svfloat32_t svsqrt_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f16_x))) svfloat16_t svsqrt_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f64_z))) svfloat64_t svsqrt_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f32_z))) svfloat32_t svsqrt_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f16_z))) svfloat16_t svsqrt_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8))) void svst1_u8(svbool_t, uint8_t *, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u32))) void svst1_u32(svbool_t, uint32_t *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64))) void svst1_u64(svbool_t, uint64_t *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u16))) void svst1_u16(svbool_t, uint16_t *, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8))) void svst1_s8(svbool_t, int8_t *, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64))) void svst1_f64(svbool_t, float64_t *, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f32))) void svst1_f32(svbool_t, float32_t *, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f16))) void svst1_f16(svbool_t, float16_t *, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s32))) void svst1_s32(svbool_t, int32_t *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s64))) void svst1_s64(svbool_t, int64_t *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s16))) void svst1_s16(svbool_t, int16_t *, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_index_u32))) void svst1_scatter_u32base_index_u32(svbool_t, svuint32_t, int64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_index_u64))) void svst1_scatter_u64base_index_u64(svbool_t, svuint64_t, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_index_f64))) void svst1_scatter_u64base_index_f64(svbool_t, svuint64_t, int64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_index_f32))) void svst1_scatter_u32base_index_f32(svbool_t, svuint32_t, int64_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_index_s32))) void svst1_scatter_u32base_index_s32(svbool_t, svuint32_t, int64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_index_s64))) void svst1_scatter_u64base_index_s64(svbool_t, svuint64_t, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_offset_u32))) void svst1_scatter_u32base_offset_u32(svbool_t, svuint32_t, int64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_offset_u64))) void svst1_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_offset_f64))) void svst1_scatter_u64base_offset_f64(svbool_t, svuint64_t, int64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_offset_f32))) void svst1_scatter_u32base_offset_f32(svbool_t, svuint32_t, int64_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_offset_s32))) void svst1_scatter_u32base_offset_s32(svbool_t, svuint32_t, int64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_offset_s64))) void svst1_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_u32))) void svst1_scatter_u32base_u32(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_u64))) void svst1_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_f64))) void svst1_scatter_u64base_f64(svbool_t, svuint64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_f32))) void svst1_scatter_u32base_f32(svbool_t, svuint32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_s32))) void svst1_scatter_u32base_s32(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_s64))) void svst1_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s32index_u32))) void svst1_scatter_s32index_u32(svbool_t, uint32_t *, svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s32index_f32))) void svst1_scatter_s32index_f32(svbool_t, float32_t *, svint32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s32index_s32))) void svst1_scatter_s32index_s32(svbool_t, int32_t *, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32index_u32))) void svst1_scatter_u32index_u32(svbool_t, uint32_t *, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32index_f32))) void svst1_scatter_u32index_f32(svbool_t, float32_t *, svuint32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32index_s32))) void svst1_scatter_u32index_s32(svbool_t, int32_t *, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s64index_u64))) void svst1_scatter_s64index_u64(svbool_t, uint64_t *, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s64index_f64))) void svst1_scatter_s64index_f64(svbool_t, float64_t *, svint64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s64index_s64))) void svst1_scatter_s64index_s64(svbool_t, int64_t *, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64index_u64))) void svst1_scatter_u64index_u64(svbool_t, uint64_t *, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64index_f64))) void svst1_scatter_u64index_f64(svbool_t, float64_t *, svuint64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64index_s64))) void svst1_scatter_u64index_s64(svbool_t, int64_t *, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s32offset_u32))) void svst1_scatter_s32offset_u32(svbool_t, uint32_t *, svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s32offset_f32))) void svst1_scatter_s32offset_f32(svbool_t, float32_t *, svint32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s32offset_s32))) void svst1_scatter_s32offset_s32(svbool_t, int32_t *, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32offset_u32))) void svst1_scatter_u32offset_u32(svbool_t, uint32_t *, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32offset_f32))) void svst1_scatter_u32offset_f32(svbool_t, float32_t *, svuint32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32offset_s32))) void svst1_scatter_u32offset_s32(svbool_t, int32_t *, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s64offset_u64))) void svst1_scatter_s64offset_u64(svbool_t, uint64_t *, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s64offset_f64))) void svst1_scatter_s64offset_f64(svbool_t, float64_t *, svint64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s64offset_s64))) void svst1_scatter_s64offset_s64(svbool_t, int64_t *, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64offset_u64))) void svst1_scatter_u64offset_u64(svbool_t, uint64_t *, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64offset_f64))) void svst1_scatter_u64offset_f64(svbool_t, float64_t *, svuint64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64offset_s64))) void svst1_scatter_u64offset_s64(svbool_t, int64_t *, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8))) void svst1_vnum_u8(svbool_t, uint8_t *, int64_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u32))) void svst1_vnum_u32(svbool_t, uint32_t *, int64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64))) void svst1_vnum_u64(svbool_t, uint64_t *, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u16))) void svst1_vnum_u16(svbool_t, uint16_t *, int64_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8))) void svst1_vnum_s8(svbool_t, int8_t *, int64_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64))) void svst1_vnum_f64(svbool_t, float64_t *, int64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f32))) void svst1_vnum_f32(svbool_t, float32_t *, int64_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f16))) void svst1_vnum_f16(svbool_t, float16_t *, int64_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s32))) void svst1_vnum_s32(svbool_t, int32_t *, int64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s64))) void svst1_vnum_s64(svbool_t, int64_t *, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s16))) void svst1_vnum_s16(svbool_t, int16_t *, int64_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_s32))) void svst1b_s32(svbool_t, int8_t *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_s64))) void svst1b_s64(svbool_t, int8_t *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_s16))) void svst1b_s16(svbool_t, int8_t *, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_u32))) void svst1b_u32(svbool_t, uint8_t *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_u64))) void svst1b_u64(svbool_t, uint8_t *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_u16))) void svst1b_u16(svbool_t, uint8_t *, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u32base_offset_u32))) void svst1b_scatter_u32base_offset_u32(svbool_t, svuint32_t, int64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u64base_offset_u64))) void svst1b_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u32base_offset_s32))) void svst1b_scatter_u32base_offset_s32(svbool_t, svuint32_t, int64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u64base_offset_s64))) void svst1b_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u32base_u32))) void svst1b_scatter_u32base_u32(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u64base_u64))) void svst1b_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u32base_s32))) void svst1b_scatter_u32base_s32(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u64base_s64))) void svst1b_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_s32offset_s32))) void svst1b_scatter_s32offset_s32(svbool_t, int8_t *, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_s32offset_u32))) void svst1b_scatter_s32offset_u32(svbool_t, uint8_t *, svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u32offset_s32))) void svst1b_scatter_u32offset_s32(svbool_t, int8_t *, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u32offset_u32))) void svst1b_scatter_u32offset_u32(svbool_t, uint8_t *, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_s64offset_s64))) void svst1b_scatter_s64offset_s64(svbool_t, int8_t *, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_s64offset_u64))) void svst1b_scatter_s64offset_u64(svbool_t, uint8_t *, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u64offset_s64))) void svst1b_scatter_u64offset_s64(svbool_t, int8_t *, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u64offset_u64))) void svst1b_scatter_u64offset_u64(svbool_t, uint8_t *, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_vnum_s32))) void svst1b_vnum_s32(svbool_t, int8_t *, int64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_vnum_s64))) void svst1b_vnum_s64(svbool_t, int8_t *, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_vnum_s16))) void svst1b_vnum_s16(svbool_t, int8_t *, int64_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_vnum_u32))) void svst1b_vnum_u32(svbool_t, uint8_t *, int64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_vnum_u64))) void svst1b_vnum_u64(svbool_t, uint8_t *, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_vnum_u16))) void svst1b_vnum_u16(svbool_t, uint8_t *, int64_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_s32))) void svst1h_s32(svbool_t, int16_t *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_s64))) void svst1h_s64(svbool_t, int16_t *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_u32))) void svst1h_u32(svbool_t, uint16_t *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_u64))) void svst1h_u64(svbool_t, uint16_t *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32base_index_u32))) void svst1h_scatter_u32base_index_u32(svbool_t, svuint32_t, int64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64base_index_u64))) void svst1h_scatter_u64base_index_u64(svbool_t, svuint64_t, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32base_index_s32))) void svst1h_scatter_u32base_index_s32(svbool_t, svuint32_t, int64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64base_index_s64))) void svst1h_scatter_u64base_index_s64(svbool_t, svuint64_t, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32base_offset_u32))) void svst1h_scatter_u32base_offset_u32(svbool_t, svuint32_t, int64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64base_offset_u64))) void svst1h_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32base_offset_s32))) void svst1h_scatter_u32base_offset_s32(svbool_t, svuint32_t, int64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64base_offset_s64))) void svst1h_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32base_u32))) void svst1h_scatter_u32base_u32(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64base_u64))) void svst1h_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32base_s32))) void svst1h_scatter_u32base_s32(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64base_s64))) void svst1h_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_s32index_s32))) void svst1h_scatter_s32index_s32(svbool_t, int16_t *, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_s32index_u32))) void svst1h_scatter_s32index_u32(svbool_t, uint16_t *, svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32index_s32))) void svst1h_scatter_u32index_s32(svbool_t, int16_t *, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32index_u32))) void svst1h_scatter_u32index_u32(svbool_t, uint16_t *, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_s64index_s64))) void svst1h_scatter_s64index_s64(svbool_t, int16_t *, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_s64index_u64))) void svst1h_scatter_s64index_u64(svbool_t, uint16_t *, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64index_s64))) void svst1h_scatter_u64index_s64(svbool_t, int16_t *, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64index_u64))) void svst1h_scatter_u64index_u64(svbool_t, uint16_t *, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_s32offset_s32))) void svst1h_scatter_s32offset_s32(svbool_t, int16_t *, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_s32offset_u32))) void svst1h_scatter_s32offset_u32(svbool_t, uint16_t *, svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32offset_s32))) void svst1h_scatter_u32offset_s32(svbool_t, int16_t *, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32offset_u32))) void svst1h_scatter_u32offset_u32(svbool_t, uint16_t *, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_s64offset_s64))) void svst1h_scatter_s64offset_s64(svbool_t, int16_t *, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_s64offset_u64))) void svst1h_scatter_s64offset_u64(svbool_t, uint16_t *, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64offset_s64))) void svst1h_scatter_u64offset_s64(svbool_t, int16_t *, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64offset_u64))) void svst1h_scatter_u64offset_u64(svbool_t, uint16_t *, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_vnum_s32))) void svst1h_vnum_s32(svbool_t, int16_t *, int64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_vnum_s64))) void svst1h_vnum_s64(svbool_t, int16_t *, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_vnum_u32))) void svst1h_vnum_u32(svbool_t, uint16_t *, int64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_vnum_u64))) void svst1h_vnum_u64(svbool_t, uint16_t *, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_s64))) void svst1w_s64(svbool_t, int32_t *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_u64))) void svst1w_u64(svbool_t, uint32_t *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64base_index_u64))) void svst1w_scatter_u64base_index_u64(svbool_t, svuint64_t, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64base_index_s64))) void svst1w_scatter_u64base_index_s64(svbool_t, svuint64_t, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64base_offset_u64))) void svst1w_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64base_offset_s64))) void svst1w_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64base_u64))) void svst1w_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64base_s64))) void svst1w_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_s64index_s64))) void svst1w_scatter_s64index_s64(svbool_t, int32_t *, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_s64index_u64))) void svst1w_scatter_s64index_u64(svbool_t, uint32_t *, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64index_s64))) void svst1w_scatter_u64index_s64(svbool_t, int32_t *, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64index_u64))) void svst1w_scatter_u64index_u64(svbool_t, uint32_t *, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_s64offset_s64))) void svst1w_scatter_s64offset_s64(svbool_t, int32_t *, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_s64offset_u64))) void svst1w_scatter_s64offset_u64(svbool_t, uint32_t *, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64offset_s64))) void svst1w_scatter_u64offset_s64(svbool_t, int32_t *, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64offset_u64))) void svst1w_scatter_u64offset_u64(svbool_t, uint32_t *, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_vnum_s64))) void svst1w_vnum_s64(svbool_t, int32_t *, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_vnum_u64))) void svst1w_vnum_u64(svbool_t, uint32_t *, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_u8))) void svst2_u8(svbool_t, uint8_t *, svuint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_u32))) void svst2_u32(svbool_t, uint32_t *, svuint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_u64))) void svst2_u64(svbool_t, uint64_t *, svuint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_u16))) void svst2_u16(svbool_t, uint16_t *, svuint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_s8))) void svst2_s8(svbool_t, int8_t *, svint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_f64))) void svst2_f64(svbool_t, float64_t *, svfloat64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_f32))) void svst2_f32(svbool_t, float32_t *, svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_f16))) void svst2_f16(svbool_t, float16_t *, svfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_s32))) void svst2_s32(svbool_t, int32_t *, svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_s64))) void svst2_s64(svbool_t, int64_t *, svint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_s16))) void svst2_s16(svbool_t, int16_t *, svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_u8))) void svst2_vnum_u8(svbool_t, uint8_t *, int64_t, svuint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_u32))) void svst2_vnum_u32(svbool_t, uint32_t *, int64_t, svuint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_u64))) void svst2_vnum_u64(svbool_t, uint64_t *, int64_t, svuint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_u16))) void svst2_vnum_u16(svbool_t, uint16_t *, int64_t, svuint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_s8))) void svst2_vnum_s8(svbool_t, int8_t *, int64_t, svint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_f64))) void svst2_vnum_f64(svbool_t, float64_t *, int64_t, svfloat64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_f32))) void svst2_vnum_f32(svbool_t, float32_t *, int64_t, svfloat32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_f16))) void svst2_vnum_f16(svbool_t, float16_t *, int64_t, svfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_s32))) void svst2_vnum_s32(svbool_t, int32_t *, int64_t, svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_s64))) void svst2_vnum_s64(svbool_t, int64_t *, int64_t, svint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_s16))) void svst2_vnum_s16(svbool_t, int16_t *, int64_t, svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_u8))) void svst3_u8(svbool_t, uint8_t *, svuint8x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_u32))) void svst3_u32(svbool_t, uint32_t *, svuint32x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_u64))) void svst3_u64(svbool_t, uint64_t *, svuint64x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_u16))) void svst3_u16(svbool_t, uint16_t *, svuint16x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_s8))) void svst3_s8(svbool_t, int8_t *, svint8x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_f64))) void svst3_f64(svbool_t, float64_t *, svfloat64x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_f32))) void svst3_f32(svbool_t, float32_t *, svfloat32x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_f16))) void svst3_f16(svbool_t, float16_t *, svfloat16x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_s32))) void svst3_s32(svbool_t, int32_t *, svint32x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_s64))) void svst3_s64(svbool_t, int64_t *, svint64x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_s16))) void svst3_s16(svbool_t, int16_t *, svint16x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_u8))) void svst3_vnum_u8(svbool_t, uint8_t *, int64_t, svuint8x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_u32))) void svst3_vnum_u32(svbool_t, uint32_t *, int64_t, svuint32x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_u64))) void svst3_vnum_u64(svbool_t, uint64_t *, int64_t, svuint64x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_u16))) void svst3_vnum_u16(svbool_t, uint16_t *, int64_t, svuint16x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_s8))) void svst3_vnum_s8(svbool_t, int8_t *, int64_t, svint8x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_f64))) void svst3_vnum_f64(svbool_t, float64_t *, int64_t, svfloat64x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_f32))) void svst3_vnum_f32(svbool_t, float32_t *, int64_t, svfloat32x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_f16))) void svst3_vnum_f16(svbool_t, float16_t *, int64_t, svfloat16x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_s32))) void svst3_vnum_s32(svbool_t, int32_t *, int64_t, svint32x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_s64))) void svst3_vnum_s64(svbool_t, int64_t *, int64_t, svint64x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_s16))) void svst3_vnum_s16(svbool_t, int16_t *, int64_t, svint16x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_u8))) void svst4_u8(svbool_t, uint8_t *, svuint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_u32))) void svst4_u32(svbool_t, uint32_t *, svuint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_u64))) void svst4_u64(svbool_t, uint64_t *, svuint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_u16))) void svst4_u16(svbool_t, uint16_t *, svuint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_s8))) void svst4_s8(svbool_t, int8_t *, svint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_f64))) void svst4_f64(svbool_t, float64_t *, svfloat64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_f32))) void svst4_f32(svbool_t, float32_t *, svfloat32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_f16))) void svst4_f16(svbool_t, float16_t *, svfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_s32))) void svst4_s32(svbool_t, int32_t *, svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_s64))) void svst4_s64(svbool_t, int64_t *, svint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_s16))) void svst4_s16(svbool_t, int16_t *, svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_u8))) void svst4_vnum_u8(svbool_t, uint8_t *, int64_t, svuint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_u32))) void svst4_vnum_u32(svbool_t, uint32_t *, int64_t, svuint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_u64))) void svst4_vnum_u64(svbool_t, uint64_t *, int64_t, svuint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_u16))) void svst4_vnum_u16(svbool_t, uint16_t *, int64_t, svuint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_s8))) void svst4_vnum_s8(svbool_t, int8_t *, int64_t, svint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_f64))) void svst4_vnum_f64(svbool_t, float64_t *, int64_t, svfloat64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_f32))) void svst4_vnum_f32(svbool_t, float32_t *, int64_t, svfloat32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_f16))) void svst4_vnum_f16(svbool_t, float16_t *, int64_t, svfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_s32))) void svst4_vnum_s32(svbool_t, int32_t *, int64_t, svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_s64))) void svst4_vnum_s64(svbool_t, int64_t *, int64_t, svint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_s16))) void svst4_vnum_s16(svbool_t, int16_t *, int64_t, svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8))) void svstnt1_u8(svbool_t, uint8_t *, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u32))) void svstnt1_u32(svbool_t, uint32_t *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64))) void svstnt1_u64(svbool_t, uint64_t *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u16))) void svstnt1_u16(svbool_t, uint16_t *, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8))) void svstnt1_s8(svbool_t, int8_t *, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64))) void svstnt1_f64(svbool_t, float64_t *, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f32))) void svstnt1_f32(svbool_t, float32_t *, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f16))) void svstnt1_f16(svbool_t, float16_t *, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s32))) void svstnt1_s32(svbool_t, int32_t *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s64))) void svstnt1_s64(svbool_t, int64_t *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s16))) void svstnt1_s16(svbool_t, int16_t *, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8))) void svstnt1_vnum_u8(svbool_t, uint8_t *, int64_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u32))) void svstnt1_vnum_u32(svbool_t, uint32_t *, int64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64))) void svstnt1_vnum_u64(svbool_t, uint64_t *, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u16))) void svstnt1_vnum_u16(svbool_t, uint16_t *, int64_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8))) void svstnt1_vnum_s8(svbool_t, int8_t *, int64_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64))) void svstnt1_vnum_f64(svbool_t, float64_t *, int64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f32))) void svstnt1_vnum_f32(svbool_t, float32_t *, int64_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f16))) void svstnt1_vnum_f16(svbool_t, float16_t *, int64_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s32))) void svstnt1_vnum_s32(svbool_t, int32_t *, int64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s64))) void svstnt1_vnum_s64(svbool_t, int64_t *, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s16))) void svstnt1_vnum_s16(svbool_t, int16_t *, int64_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f64_m))) svfloat64_t svsub_n_f64_m(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f32_m))) svfloat32_t svsub_n_f32_m(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f16_m))) svfloat16_t svsub_n_f16_m(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f64_x))) svfloat64_t svsub_n_f64_x(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f32_x))) svfloat32_t svsub_n_f32_x(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f16_x))) svfloat16_t svsub_n_f16_x(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f64_z))) svfloat64_t svsub_n_f64_z(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f32_z))) svfloat32_t svsub_n_f32_z(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f16_z))) svfloat16_t svsub_n_f16_z(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u8_m))) svuint8_t svsub_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u32_m))) svuint32_t svsub_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u64_m))) svuint64_t svsub_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u16_m))) svuint16_t svsub_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s8_m))) svint8_t svsub_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s32_m))) svint32_t svsub_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s64_m))) svint64_t svsub_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s16_m))) svint16_t svsub_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u8_x))) svuint8_t svsub_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u32_x))) svuint32_t svsub_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u64_x))) svuint64_t svsub_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u16_x))) svuint16_t svsub_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s8_x))) svint8_t svsub_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s32_x))) svint32_t svsub_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s64_x))) svint64_t svsub_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s16_x))) svint16_t svsub_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u8_z))) svuint8_t svsub_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u32_z))) svuint32_t svsub_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u64_z))) svuint64_t svsub_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u16_z))) svuint16_t svsub_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s8_z))) svint8_t svsub_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s32_z))) svint32_t svsub_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s64_z))) svint64_t svsub_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s16_z))) svint16_t svsub_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f64_m))) svfloat64_t svsub_f64_m(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f32_m))) svfloat32_t svsub_f32_m(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f16_m))) svfloat16_t svsub_f16_m(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f64_x))) svfloat64_t svsub_f64_x(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f32_x))) svfloat32_t svsub_f32_x(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f16_x))) svfloat16_t svsub_f16_x(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f64_z))) svfloat64_t svsub_f64_z(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f32_z))) svfloat32_t svsub_f32_z(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f16_z))) svfloat16_t svsub_f16_z(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u8_m))) svuint8_t svsub_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u32_m))) svuint32_t svsub_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u64_m))) svuint64_t svsub_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u16_m))) svuint16_t svsub_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s8_m))) svint8_t svsub_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s32_m))) svint32_t svsub_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s64_m))) svint64_t svsub_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s16_m))) svint16_t svsub_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u8_x))) svuint8_t svsub_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u32_x))) svuint32_t svsub_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u64_x))) svuint64_t svsub_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u16_x))) svuint16_t svsub_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s8_x))) svint8_t svsub_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s32_x))) svint32_t svsub_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s64_x))) svint64_t svsub_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s16_x))) svint16_t svsub_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u8_z))) svuint8_t svsub_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u32_z))) svuint32_t svsub_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u64_z))) svuint64_t svsub_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u16_z))) svuint16_t svsub_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s8_z))) svint8_t svsub_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s32_z))) svint32_t svsub_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s64_z))) svint64_t svsub_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s16_z))) svint16_t svsub_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f64_m))) svfloat64_t svsubr_n_f64_m(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f32_m))) svfloat32_t svsubr_n_f32_m(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f16_m))) svfloat16_t svsubr_n_f16_m(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f64_x))) svfloat64_t svsubr_n_f64_x(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f32_x))) svfloat32_t svsubr_n_f32_x(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f16_x))) svfloat16_t svsubr_n_f16_x(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f64_z))) svfloat64_t svsubr_n_f64_z(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f32_z))) svfloat32_t svsubr_n_f32_z(svbool_t, svfloat32_t, float32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f16_z))) svfloat16_t svsubr_n_f16_z(svbool_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u8_m))) svuint8_t svsubr_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u32_m))) svuint32_t svsubr_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u64_m))) svuint64_t svsubr_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u16_m))) svuint16_t svsubr_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s8_m))) svint8_t svsubr_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s32_m))) svint32_t svsubr_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s64_m))) svint64_t svsubr_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s16_m))) svint16_t svsubr_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u8_x))) svuint8_t svsubr_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u32_x))) svuint32_t svsubr_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u64_x))) svuint64_t svsubr_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u16_x))) svuint16_t svsubr_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s8_x))) svint8_t svsubr_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s32_x))) svint32_t svsubr_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s64_x))) svint64_t svsubr_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s16_x))) svint16_t svsubr_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u8_z))) svuint8_t svsubr_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u32_z))) svuint32_t svsubr_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u64_z))) svuint64_t svsubr_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u16_z))) svuint16_t svsubr_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s8_z))) svint8_t svsubr_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s32_z))) svint32_t svsubr_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s64_z))) svint64_t svsubr_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s16_z))) svint16_t svsubr_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f64_m))) svfloat64_t svsubr_f64_m(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f32_m))) svfloat32_t svsubr_f32_m(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f16_m))) svfloat16_t svsubr_f16_m(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f64_x))) svfloat64_t svsubr_f64_x(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f32_x))) svfloat32_t svsubr_f32_x(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f16_x))) svfloat16_t svsubr_f16_x(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f64_z))) svfloat64_t svsubr_f64_z(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f32_z))) svfloat32_t svsubr_f32_z(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f16_z))) svfloat16_t svsubr_f16_z(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u8_m))) svuint8_t svsubr_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u32_m))) svuint32_t svsubr_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u64_m))) svuint64_t svsubr_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u16_m))) svuint16_t svsubr_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s8_m))) svint8_t svsubr_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s32_m))) svint32_t svsubr_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s64_m))) svint64_t svsubr_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s16_m))) svint16_t svsubr_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u8_x))) svuint8_t svsubr_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u32_x))) svuint32_t svsubr_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u64_x))) svuint64_t svsubr_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u16_x))) svuint16_t svsubr_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s8_x))) svint8_t svsubr_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s32_x))) svint32_t svsubr_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s64_x))) svint64_t svsubr_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s16_x))) svint16_t svsubr_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u8_z))) svuint8_t svsubr_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u32_z))) svuint32_t svsubr_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u64_z))) svuint64_t svsubr_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u16_z))) svuint16_t svsubr_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s8_z))) svint8_t svsubr_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s32_z))) svint32_t svsubr_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s64_z))) svint64_t svsubr_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s16_z))) svint16_t svsubr_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_u8))) svuint8_t svtbl_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_u32))) svuint32_t svtbl_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_u64))) svuint64_t svtbl_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_u16))) svuint16_t svtbl_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_s8))) svint8_t svtbl_s8(svint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_f64))) svfloat64_t svtbl_f64(svfloat64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_f32))) svfloat32_t svtbl_f32(svfloat32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_f16))) svfloat16_t svtbl_f16(svfloat16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_s32))) svint32_t svtbl_s32(svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_s64))) svint64_t svtbl_s64(svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_s16))) svint16_t svtbl_s16(svint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtmad_f64))) svfloat64_t svtmad_f64(svfloat64_t, svfloat64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtmad_f32))) svfloat32_t svtmad_f32(svfloat32_t, svfloat32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtmad_f16))) svfloat16_t svtmad_f16(svfloat16_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_u8))) svuint8_t svtrn1_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_u32))) svuint32_t svtrn1_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_u64))) svuint64_t svtrn1_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_u16))) svuint16_t svtrn1_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_s8))) svint8_t svtrn1_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_f64))) svfloat64_t svtrn1_f64(svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_f32))) svfloat32_t svtrn1_f32(svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_f16))) svfloat16_t svtrn1_f16(svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_s32))) svint32_t svtrn1_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_s64))) svint64_t svtrn1_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_s16))) svint16_t svtrn1_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_b16))) svbool_t svtrn1_b16(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_b32))) svbool_t svtrn1_b32(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_b64))) svbool_t svtrn1_b64(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_b8))) svbool_t svtrn1_b8(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_u8))) svuint8_t svtrn2_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_u32))) svuint32_t svtrn2_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_u64))) svuint64_t svtrn2_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_u16))) svuint16_t svtrn2_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_s8))) svint8_t svtrn2_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_f64))) svfloat64_t svtrn2_f64(svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_f32))) svfloat32_t svtrn2_f32(svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_f16))) svfloat16_t svtrn2_f16(svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_s32))) svint32_t svtrn2_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_s64))) svint64_t svtrn2_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_s16))) svint16_t svtrn2_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_b16))) svbool_t svtrn2_b16(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_b32))) svbool_t svtrn2_b32(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_b64))) svbool_t svtrn2_b64(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_b8))) svbool_t svtrn2_b8(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtsmul_f64))) svfloat64_t svtsmul_f64(svfloat64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtsmul_f32))) svfloat32_t svtsmul_f32(svfloat32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtsmul_f16))) svfloat16_t svtsmul_f16(svfloat16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtssel_f64))) svfloat64_t svtssel_f64(svfloat64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtssel_f32))) svfloat32_t svtssel_f32(svfloat32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtssel_f16))) svfloat16_t svtssel_f16(svfloat16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_u8))) svuint8x2_t svundef2_u8(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_u32))) svuint32x2_t svundef2_u32(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_u64))) svuint64x2_t svundef2_u64(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_u16))) svuint16x2_t svundef2_u16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_s8))) svint8x2_t svundef2_s8(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_f64))) svfloat64x2_t svundef2_f64(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_f32))) svfloat32x2_t svundef2_f32(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_f16))) svfloat16x2_t svundef2_f16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_s32))) svint32x2_t svundef2_s32(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_s64))) svint64x2_t svundef2_s64(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_s16))) svint16x2_t svundef2_s16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef3_u8))) svuint8x3_t svundef3_u8(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef3_u32))) svuint32x3_t svundef3_u32(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef3_u64))) svuint64x3_t svundef3_u64(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef3_u16))) svuint16x3_t svundef3_u16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef3_s8))) svint8x3_t svundef3_s8(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef3_f64))) svfloat64x3_t svundef3_f64(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef3_f32))) svfloat32x3_t svundef3_f32(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef3_f16))) svfloat16x3_t svundef3_f16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef3_s32))) svint32x3_t svundef3_s32(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef3_s64))) svint64x3_t svundef3_s64(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef3_s16))) svint16x3_t svundef3_s16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_u8))) svuint8x4_t svundef4_u8(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_u32))) svuint32x4_t svundef4_u32(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_u64))) svuint64x4_t svundef4_u64(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_u16))) svuint16x4_t svundef4_u16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_s8))) svint8x4_t svundef4_s8(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_f64))) svfloat64x4_t svundef4_f64(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_f32))) svfloat32x4_t svundef4_f32(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_f16))) svfloat16x4_t svundef4_f16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_s32))) svint32x4_t svundef4_s32(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_s64))) svint64x4_t svundef4_s64(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_s16))) svint16x4_t svundef4_s16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef_u8))) svuint8_t svundef_u8(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef_u32))) svuint32_t svundef_u32(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef_u64))) svuint64_t svundef_u64(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef_u16))) svuint16_t svundef_u16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef_s8))) svint8_t svundef_s8(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef_f64))) svfloat64_t svundef_f64(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef_f32))) svfloat32_t svundef_f32(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef_f16))) svfloat16_t svundef_f16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef_s32))) svint32_t svundef_s32(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef_s64))) svint64_t svundef_s64(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef_s16))) svint16_t svundef_s16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpkhi_b))) svbool_t svunpkhi_b(svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpkhi_s32))) svint32_t svunpkhi_s32(svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpkhi_s64))) svint64_t svunpkhi_s64(svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpkhi_s16))) svint16_t svunpkhi_s16(svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpkhi_u32))) svuint32_t svunpkhi_u32(svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpkhi_u64))) svuint64_t svunpkhi_u64(svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpkhi_u16))) svuint16_t svunpkhi_u16(svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpklo_b))) svbool_t svunpklo_b(svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpklo_s32))) svint32_t svunpklo_s32(svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpklo_s64))) svint64_t svunpklo_s64(svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpklo_s16))) svint16_t svunpklo_s16(svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpklo_u32))) svuint32_t svunpklo_u32(svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpklo_u64))) svuint64_t svunpklo_u64(svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpklo_u16))) svuint16_t svunpklo_u16(svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_u8))) svuint8_t svuzp1_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_u32))) svuint32_t svuzp1_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_u64))) svuint64_t svuzp1_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_u16))) svuint16_t svuzp1_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_s8))) svint8_t svuzp1_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_f64))) svfloat64_t svuzp1_f64(svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_f32))) svfloat32_t svuzp1_f32(svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_f16))) svfloat16_t svuzp1_f16(svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_s32))) svint32_t svuzp1_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_s64))) svint64_t svuzp1_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_s16))) svint16_t svuzp1_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_b16))) svbool_t svuzp1_b16(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_b32))) svbool_t svuzp1_b32(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_b64))) svbool_t svuzp1_b64(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_b8))) svbool_t svuzp1_b8(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_u8))) svuint8_t svuzp2_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_u32))) svuint32_t svuzp2_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_u64))) svuint64_t svuzp2_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_u16))) svuint16_t svuzp2_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_s8))) svint8_t svuzp2_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_f64))) svfloat64_t svuzp2_f64(svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_f32))) svfloat32_t svuzp2_f32(svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_f16))) svfloat16_t svuzp2_f16(svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_s32))) svint32_t svuzp2_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_s64))) svint64_t svuzp2_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_s16))) svint16_t svuzp2_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_b16))) svbool_t svuzp2_b16(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_b32))) svbool_t svuzp2_b32(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_b64))) svbool_t svuzp2_b64(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_b8))) svbool_t svuzp2_b8(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_s32))) svbool_t svwhilele_b8_s32(int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_s32))) svbool_t svwhilele_b32_s32(int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_s32))) svbool_t svwhilele_b64_s32(int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_s32))) svbool_t svwhilele_b16_s32(int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_s64))) svbool_t svwhilele_b8_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_s64))) svbool_t svwhilele_b32_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_s64))) svbool_t svwhilele_b64_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_s64))) svbool_t svwhilele_b16_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_u32))) svbool_t svwhilele_b8_u32(uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_u32))) svbool_t svwhilele_b32_u32(uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_u32))) svbool_t svwhilele_b64_u32(uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_u32))) svbool_t svwhilele_b16_u32(uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_u64))) svbool_t svwhilele_b8_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_u64))) svbool_t svwhilele_b32_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_u64))) svbool_t svwhilele_b64_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_u64))) svbool_t svwhilele_b16_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_u32))) svbool_t svwhilelt_b8_u32(uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_u32))) svbool_t svwhilelt_b32_u32(uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_u32))) svbool_t svwhilelt_b64_u32(uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_u32))) svbool_t svwhilelt_b16_u32(uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_u64))) svbool_t svwhilelt_b8_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_u64))) svbool_t svwhilelt_b32_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_u64))) svbool_t svwhilelt_b64_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_u64))) svbool_t svwhilelt_b16_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_s32))) svbool_t svwhilelt_b8_s32(int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_s32))) svbool_t svwhilelt_b32_s32(int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_s32))) svbool_t svwhilelt_b64_s32(int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_s32))) svbool_t svwhilelt_b16_s32(int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_s64))) svbool_t svwhilelt_b8_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_s64))) svbool_t svwhilelt_b32_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_s64))) svbool_t svwhilelt_b64_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_s64))) svbool_t svwhilelt_b16_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwrffr))) void svwrffr(svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_u8))) svuint8_t svzip1_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_u32))) svuint32_t svzip1_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_u64))) svuint64_t svzip1_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_u16))) svuint16_t svzip1_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_s8))) svint8_t svzip1_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_f64))) svfloat64_t svzip1_f64(svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_f32))) svfloat32_t svzip1_f32(svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_f16))) svfloat16_t svzip1_f16(svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_s32))) svint32_t svzip1_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_s64))) svint64_t svzip1_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_s16))) svint16_t svzip1_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_b16))) svbool_t svzip1_b16(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_b32))) svbool_t svzip1_b32(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_b64))) svbool_t svzip1_b64(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_b8))) svbool_t svzip1_b8(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_u8))) svuint8_t svzip2_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_u32))) svuint32_t svzip2_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_u64))) svuint64_t svzip2_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_u16))) svuint16_t svzip2_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_s8))) svint8_t svzip2_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_f64))) svfloat64_t svzip2_f64(svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_f32))) svfloat32_t svzip2_f32(svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_f16))) svfloat16_t svzip2_f16(svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_s32))) svint32_t svzip2_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_s64))) svint64_t svzip2_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_s16))) svint16_t svzip2_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_b16))) svbool_t svzip2_b16(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_b32))) svbool_t svzip2_b32(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_b64))) svbool_t svzip2_b64(svbool_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_b8))) svbool_t svzip2_b8(svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f64_m))) svfloat64_t svabd_m(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f32_m))) svfloat32_t svabd_m(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f16_m))) svfloat16_t svabd_m(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f64_x))) svfloat64_t svabd_x(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f32_x))) svfloat32_t svabd_x(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f16_x))) svfloat16_t svabd_x(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f64_z))) svfloat64_t svabd_z(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f32_z))) svfloat32_t svabd_z(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f16_z))) svfloat16_t svabd_z(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s8_m))) svint8_t svabd_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s32_m))) svint32_t svabd_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s64_m))) svint64_t svabd_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s16_m))) svint16_t svabd_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s8_x))) svint8_t svabd_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s32_x))) svint32_t svabd_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s64_x))) svint64_t svabd_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s16_x))) svint16_t svabd_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s8_z))) svint8_t svabd_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s32_z))) svint32_t svabd_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s64_z))) svint64_t svabd_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_s16_z))) svint16_t svabd_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u8_m))) svuint8_t svabd_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u32_m))) svuint32_t svabd_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u64_m))) svuint64_t svabd_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u16_m))) svuint16_t svabd_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u8_x))) svuint8_t svabd_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u32_x))) svuint32_t svabd_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u64_x))) svuint64_t svabd_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u16_x))) svuint16_t svabd_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u8_z))) svuint8_t svabd_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u32_z))) svuint32_t svabd_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u64_z))) svuint64_t svabd_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_u16_z))) svuint16_t svabd_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f64_m))) svfloat64_t svabd_m(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f32_m))) svfloat32_t svabd_m(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f16_m))) svfloat16_t svabd_m(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f64_x))) svfloat64_t svabd_x(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f32_x))) svfloat32_t svabd_x(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f16_x))) svfloat16_t svabd_x(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f64_z))) svfloat64_t svabd_z(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f32_z))) svfloat32_t svabd_z(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_f16_z))) svfloat16_t svabd_z(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s8_m))) svint8_t svabd_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s32_m))) svint32_t svabd_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s64_m))) svint64_t svabd_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s16_m))) svint16_t svabd_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s8_x))) svint8_t svabd_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s32_x))) svint32_t svabd_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s64_x))) svint64_t svabd_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s16_x))) svint16_t svabd_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s8_z))) svint8_t svabd_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s32_z))) svint32_t svabd_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s64_z))) svint64_t svabd_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_s16_z))) svint16_t svabd_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u8_m))) svuint8_t svabd_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u32_m))) svuint32_t svabd_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u64_m))) svuint64_t svabd_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u16_m))) svuint16_t svabd_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u8_x))) svuint8_t svabd_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u32_x))) svuint32_t svabd_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u64_x))) svuint64_t svabd_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u16_x))) svuint16_t svabd_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u8_z))) svuint8_t svabd_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u32_z))) svuint32_t svabd_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u64_z))) svuint64_t svabd_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_u16_z))) svuint16_t svabd_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f64_m))) svfloat64_t svabs_m(svfloat64_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f32_m))) svfloat32_t svabs_m(svfloat32_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f16_m))) svfloat16_t svabs_m(svfloat16_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f64_x))) svfloat64_t svabs_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f32_x))) svfloat32_t svabs_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f16_x))) svfloat16_t svabs_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f64_z))) svfloat64_t svabs_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f32_z))) svfloat32_t svabs_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_f16_z))) svfloat16_t svabs_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s8_m))) svint8_t svabs_m(svint8_t, svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s32_m))) svint32_t svabs_m(svint32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s64_m))) svint64_t svabs_m(svint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s16_m))) svint16_t svabs_m(svint16_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s8_x))) svint8_t svabs_x(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s32_x))) svint32_t svabs_x(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s64_x))) svint64_t svabs_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s16_x))) svint16_t svabs_x(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s8_z))) svint8_t svabs_z(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s32_z))) svint32_t svabs_z(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s64_z))) svint64_t svabs_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabs_s16_z))) svint16_t svabs_z(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacge_n_f64))) svbool_t svacge(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacge_n_f32))) svbool_t svacge(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacge_n_f16))) svbool_t svacge(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacge_f64))) svbool_t svacge(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacge_f32))) svbool_t svacge(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacge_f16))) svbool_t svacge(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacgt_n_f64))) svbool_t svacgt(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacgt_n_f32))) svbool_t svacgt(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacgt_n_f16))) svbool_t svacgt(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacgt_f64))) svbool_t svacgt(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacgt_f32))) svbool_t svacgt(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacgt_f16))) svbool_t svacgt(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacle_n_f64))) svbool_t svacle(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacle_n_f32))) svbool_t svacle(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacle_n_f16))) svbool_t svacle(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacle_f64))) svbool_t svacle(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacle_f32))) svbool_t svacle(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svacle_f16))) svbool_t svacle(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaclt_n_f64))) svbool_t svaclt(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaclt_n_f32))) svbool_t svaclt(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaclt_n_f16))) svbool_t svaclt(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaclt_f64))) svbool_t svaclt(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaclt_f32))) svbool_t svaclt(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaclt_f16))) svbool_t svaclt(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f64_m))) svfloat64_t svadd_m(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f32_m))) svfloat32_t svadd_m(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f16_m))) svfloat16_t svadd_m(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f64_x))) svfloat64_t svadd_x(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f32_x))) svfloat32_t svadd_x(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f16_x))) svfloat16_t svadd_x(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f64_z))) svfloat64_t svadd_z(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f32_z))) svfloat32_t svadd_z(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_f16_z))) svfloat16_t svadd_z(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u8_m))) svuint8_t svadd_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u32_m))) svuint32_t svadd_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u64_m))) svuint64_t svadd_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u16_m))) svuint16_t svadd_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s8_m))) svint8_t svadd_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s32_m))) svint32_t svadd_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s64_m))) svint64_t svadd_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s16_m))) svint16_t svadd_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u8_x))) svuint8_t svadd_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u32_x))) svuint32_t svadd_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u64_x))) svuint64_t svadd_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u16_x))) svuint16_t svadd_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s8_x))) svint8_t svadd_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s32_x))) svint32_t svadd_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s64_x))) svint64_t svadd_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s16_x))) svint16_t svadd_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u8_z))) svuint8_t svadd_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u32_z))) svuint32_t svadd_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u64_z))) svuint64_t svadd_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_u16_z))) svuint16_t svadd_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s8_z))) svint8_t svadd_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s32_z))) svint32_t svadd_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s64_z))) svint64_t svadd_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_s16_z))) svint16_t svadd_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f64_m))) svfloat64_t svadd_m(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f32_m))) svfloat32_t svadd_m(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f16_m))) svfloat16_t svadd_m(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f64_x))) svfloat64_t svadd_x(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f32_x))) svfloat32_t svadd_x(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f16_x))) svfloat16_t svadd_x(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f64_z))) svfloat64_t svadd_z(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f32_z))) svfloat32_t svadd_z(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_f16_z))) svfloat16_t svadd_z(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u8_m))) svuint8_t svadd_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u32_m))) svuint32_t svadd_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u64_m))) svuint64_t svadd_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u16_m))) svuint16_t svadd_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s8_m))) svint8_t svadd_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s32_m))) svint32_t svadd_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s64_m))) svint64_t svadd_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s16_m))) svint16_t svadd_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u8_x))) svuint8_t svadd_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u32_x))) svuint32_t svadd_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u64_x))) svuint64_t svadd_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u16_x))) svuint16_t svadd_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s8_x))) svint8_t svadd_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s32_x))) svint32_t svadd_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s64_x))) svint64_t svadd_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s16_x))) svint16_t svadd_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u8_z))) svuint8_t svadd_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u32_z))) svuint32_t svadd_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u64_z))) svuint64_t svadd_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_u16_z))) svuint16_t svadd_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s8_z))) svint8_t svadd_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s32_z))) svint32_t svadd_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s64_z))) svint64_t svadd_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s16_z))) svint16_t svadd_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadda_f64))) float64_t svadda(svbool_t, float64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadda_f32))) float32_t svadda(svbool_t, float32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadda_f16))) float16_t svadda(svbool_t, float16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_s8))) int64_t svaddv(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_s32))) int64_t svaddv(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_s64))) int64_t svaddv(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_s16))) int64_t svaddv(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_u8))) uint64_t svaddv(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_u32))) uint64_t svaddv(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_u64))) uint64_t svaddv(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_u16))) uint64_t svaddv(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_f64))) float64_t svaddv(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_f32))) float32_t svaddv(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_f16))) float16_t svaddv(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrb_u32base_u32offset))) svuint32_t svadrb_offset(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrb_u64base_u64offset))) svuint64_t svadrb_offset(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrb_u32base_s32offset))) svuint32_t svadrb_offset(svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrb_u64base_s64offset))) svuint64_t svadrb_offset(svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrd_u32base_u32index))) svuint32_t svadrd_index(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrd_u64base_u64index))) svuint64_t svadrd_index(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrd_u32base_s32index))) svuint32_t svadrd_index(svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrd_u64base_s64index))) svuint64_t svadrd_index(svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrh_u32base_u32index))) svuint32_t svadrh_index(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrh_u64base_u64index))) svuint64_t svadrh_index(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrh_u32base_s32index))) svuint32_t svadrh_index(svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrh_u64base_s64index))) svuint64_t svadrh_index(svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrw_u32base_u32index))) svuint32_t svadrw_index(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrw_u64base_u64index))) svuint64_t svadrw_index(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrw_u32base_s32index))) svuint32_t svadrw_index(svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrw_u64base_s64index))) svuint64_t svadrw_index(svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_b_z))) svbool_t svand_z(svbool_t, svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u8_m))) svuint8_t svand_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u32_m))) svuint32_t svand_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u64_m))) svuint64_t svand_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u16_m))) svuint16_t svand_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s8_m))) svint8_t svand_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s32_m))) svint32_t svand_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s64_m))) svint64_t svand_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s16_m))) svint16_t svand_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u8_x))) svuint8_t svand_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u32_x))) svuint32_t svand_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u64_x))) svuint64_t svand_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u16_x))) svuint16_t svand_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s8_x))) svint8_t svand_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s32_x))) svint32_t svand_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s64_x))) svint64_t svand_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s16_x))) svint16_t svand_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u8_z))) svuint8_t svand_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u32_z))) svuint32_t svand_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u64_z))) svuint64_t svand_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_u16_z))) svuint16_t svand_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s8_z))) svint8_t svand_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s32_z))) svint32_t svand_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s64_z))) svint64_t svand_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_n_s16_z))) svint16_t svand_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u8_m))) svuint8_t svand_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u32_m))) svuint32_t svand_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u64_m))) svuint64_t svand_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u16_m))) svuint16_t svand_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s8_m))) svint8_t svand_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s32_m))) svint32_t svand_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s64_m))) svint64_t svand_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s16_m))) svint16_t svand_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u8_x))) svuint8_t svand_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u32_x))) svuint32_t svand_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u64_x))) svuint64_t svand_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u16_x))) svuint16_t svand_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s8_x))) svint8_t svand_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s32_x))) svint32_t svand_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s64_x))) svint64_t svand_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s16_x))) svint16_t svand_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u8_z))) svuint8_t svand_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u32_z))) svuint32_t svand_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u64_z))) svuint64_t svand_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_u16_z))) svuint16_t svand_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s8_z))) svint8_t svand_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s32_z))) svint32_t svand_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s64_z))) svint64_t svand_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svand_s16_z))) svint16_t svand_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandv_u8))) uint8_t svandv(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandv_u32))) uint32_t svandv(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandv_u64))) uint64_t svandv(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandv_u16))) uint16_t svandv(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandv_s8))) int8_t svandv(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandv_s32))) int32_t svandv(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandv_s64))) int64_t svandv(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandv_s16))) int16_t svandv(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s8_m))) svint8_t svasr_m(svbool_t, svint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s32_m))) svint32_t svasr_m(svbool_t, svint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s64_m))) svint64_t svasr_m(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s16_m))) svint16_t svasr_m(svbool_t, svint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s8_x))) svint8_t svasr_x(svbool_t, svint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s32_x))) svint32_t svasr_x(svbool_t, svint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s64_x))) svint64_t svasr_x(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s16_x))) svint16_t svasr_x(svbool_t, svint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s8_z))) svint8_t svasr_z(svbool_t, svint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s32_z))) svint32_t svasr_z(svbool_t, svint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s64_z))) svint64_t svasr_z(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_n_s16_z))) svint16_t svasr_z(svbool_t, svint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s8_m))) svint8_t svasr_m(svbool_t, svint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s32_m))) svint32_t svasr_m(svbool_t, svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s64_m))) svint64_t svasr_m(svbool_t, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s16_m))) svint16_t svasr_m(svbool_t, svint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s8_x))) svint8_t svasr_x(svbool_t, svint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s32_x))) svint32_t svasr_x(svbool_t, svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s64_x))) svint64_t svasr_x(svbool_t, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s16_x))) svint16_t svasr_x(svbool_t, svint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s8_z))) svint8_t svasr_z(svbool_t, svint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s32_z))) svint32_t svasr_z(svbool_t, svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s64_z))) svint64_t svasr_z(svbool_t, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_s16_z))) svint16_t svasr_z(svbool_t, svint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s8_m))) svint8_t svasr_wide_m(svbool_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s32_m))) svint32_t svasr_wide_m(svbool_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s16_m))) svint16_t svasr_wide_m(svbool_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s8_x))) svint8_t svasr_wide_x(svbool_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s32_x))) svint32_t svasr_wide_x(svbool_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s16_x))) svint16_t svasr_wide_x(svbool_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s8_z))) svint8_t svasr_wide_z(svbool_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s32_z))) svint32_t svasr_wide_z(svbool_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_n_s16_z))) svint16_t svasr_wide_z(svbool_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s8_m))) svint8_t svasr_wide_m(svbool_t, svint8_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s32_m))) svint32_t svasr_wide_m(svbool_t, svint32_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s16_m))) svint16_t svasr_wide_m(svbool_t, svint16_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s8_x))) svint8_t svasr_wide_x(svbool_t, svint8_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s32_x))) svint32_t svasr_wide_x(svbool_t, svint32_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s16_x))) svint16_t svasr_wide_x(svbool_t, svint16_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s8_z))) svint8_t svasr_wide_z(svbool_t, svint8_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s32_z))) svint32_t svasr_wide_z(svbool_t, svint32_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasr_wide_s16_z))) svint16_t svasr_wide_z(svbool_t, svint16_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s8_m))) svint8_t svasrd_m(svbool_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s32_m))) svint32_t svasrd_m(svbool_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s64_m))) svint64_t svasrd_m(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s16_m))) svint16_t svasrd_m(svbool_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s8_x))) svint8_t svasrd_x(svbool_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s32_x))) svint32_t svasrd_x(svbool_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s64_x))) svint64_t svasrd_x(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s16_x))) svint16_t svasrd_x(svbool_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s8_z))) svint8_t svasrd_z(svbool_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s32_z))) svint32_t svasrd_z(svbool_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s64_z))) svint64_t svasrd_z(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svasrd_n_s16_z))) svint16_t svasrd_z(svbool_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_b_z))) svbool_t svbic_z(svbool_t, svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u8_m))) svuint8_t svbic_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u32_m))) svuint32_t svbic_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u64_m))) svuint64_t svbic_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u16_m))) svuint16_t svbic_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s8_m))) svint8_t svbic_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s32_m))) svint32_t svbic_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s64_m))) svint64_t svbic_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s16_m))) svint16_t svbic_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u8_x))) svuint8_t svbic_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u32_x))) svuint32_t svbic_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u64_x))) svuint64_t svbic_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u16_x))) svuint16_t svbic_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s8_x))) svint8_t svbic_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s32_x))) svint32_t svbic_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s64_x))) svint64_t svbic_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s16_x))) svint16_t svbic_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u8_z))) svuint8_t svbic_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u32_z))) svuint32_t svbic_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u64_z))) svuint64_t svbic_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_u16_z))) svuint16_t svbic_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s8_z))) svint8_t svbic_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s32_z))) svint32_t svbic_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s64_z))) svint64_t svbic_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_n_s16_z))) svint16_t svbic_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u8_m))) svuint8_t svbic_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u32_m))) svuint32_t svbic_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u64_m))) svuint64_t svbic_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u16_m))) svuint16_t svbic_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s8_m))) svint8_t svbic_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s32_m))) svint32_t svbic_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s64_m))) svint64_t svbic_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s16_m))) svint16_t svbic_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u8_x))) svuint8_t svbic_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u32_x))) svuint32_t svbic_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u64_x))) svuint64_t svbic_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u16_x))) svuint16_t svbic_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s8_x))) svint8_t svbic_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s32_x))) svint32_t svbic_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s64_x))) svint64_t svbic_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s16_x))) svint16_t svbic_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u8_z))) svuint8_t svbic_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u32_z))) svuint32_t svbic_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u64_z))) svuint64_t svbic_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_u16_z))) svuint16_t svbic_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s8_z))) svint8_t svbic_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s32_z))) svint32_t svbic_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s64_z))) svint64_t svbic_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbic_s16_z))) svint16_t svbic_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbrka_b_m))) svbool_t svbrka_m(svbool_t, svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbrka_b_z))) svbool_t svbrka_z(svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbrkb_b_m))) svbool_t svbrkb_m(svbool_t, svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbrkb_b_z))) svbool_t svbrkb_z(svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbrkn_b_z))) svbool_t svbrkn_z(svbool_t, svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbrkpa_b_z))) svbool_t svbrkpa_z(svbool_t, svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbrkpb_b_z))) svbool_t svbrkpb_z(svbool_t, svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f64_m))) svfloat64_t svcadd_m(svbool_t, svfloat64_t, svfloat64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f32_m))) svfloat32_t svcadd_m(svbool_t, svfloat32_t, svfloat32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f16_m))) svfloat16_t svcadd_m(svbool_t, svfloat16_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f64_x))) svfloat64_t svcadd_x(svbool_t, svfloat64_t, svfloat64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f32_x))) svfloat32_t svcadd_x(svbool_t, svfloat32_t, svfloat32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f16_x))) svfloat16_t svcadd_x(svbool_t, svfloat16_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f64_z))) svfloat64_t svcadd_z(svbool_t, svfloat64_t, svfloat64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f32_z))) svfloat32_t svcadd_z(svbool_t, svfloat32_t, svfloat32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_f16_z))) svfloat16_t svcadd_z(svbool_t, svfloat16_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_u8))) uint8_t svclasta(svbool_t, uint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_u32))) uint32_t svclasta(svbool_t, uint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_u64))) uint64_t svclasta(svbool_t, uint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_u16))) uint16_t svclasta(svbool_t, uint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_s8))) int8_t svclasta(svbool_t, int8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_f64))) float64_t svclasta(svbool_t, float64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_f32))) float32_t svclasta(svbool_t, float32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_f16))) float16_t svclasta(svbool_t, float16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_s32))) int32_t svclasta(svbool_t, int32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_s64))) int64_t svclasta(svbool_t, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_s16))) int16_t svclasta(svbool_t, int16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_u8))) svuint8_t svclasta(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_u32))) svuint32_t svclasta(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_u64))) svuint64_t svclasta(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_u16))) svuint16_t svclasta(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_s8))) svint8_t svclasta(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_f64))) svfloat64_t svclasta(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_f32))) svfloat32_t svclasta(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_f16))) svfloat16_t svclasta(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_s32))) svint32_t svclasta(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_s64))) svint64_t svclasta(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_s16))) svint16_t svclasta(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_u8))) uint8_t svclastb(svbool_t, uint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_u32))) uint32_t svclastb(svbool_t, uint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_u64))) uint64_t svclastb(svbool_t, uint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_u16))) uint16_t svclastb(svbool_t, uint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_s8))) int8_t svclastb(svbool_t, int8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_f64))) float64_t svclastb(svbool_t, float64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_f32))) float32_t svclastb(svbool_t, float32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_f16))) float16_t svclastb(svbool_t, float16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_s32))) int32_t svclastb(svbool_t, int32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_s64))) int64_t svclastb(svbool_t, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_s16))) int16_t svclastb(svbool_t, int16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_u8))) svuint8_t svclastb(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_u32))) svuint32_t svclastb(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_u64))) svuint64_t svclastb(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_u16))) svuint16_t svclastb(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_s8))) svint8_t svclastb(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_f64))) svfloat64_t svclastb(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_f32))) svfloat32_t svclastb(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_f16))) svfloat16_t svclastb(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_s32))) svint32_t svclastb(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_s64))) svint64_t svclastb(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_s16))) svint16_t svclastb(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s8_m))) svuint8_t svcls_m(svuint8_t, svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s32_m))) svuint32_t svcls_m(svuint32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s64_m))) svuint64_t svcls_m(svuint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s16_m))) svuint16_t svcls_m(svuint16_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s8_x))) svuint8_t svcls_x(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s32_x))) svuint32_t svcls_x(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s64_x))) svuint64_t svcls_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s16_x))) svuint16_t svcls_x(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s8_z))) svuint8_t svcls_z(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s32_z))) svuint32_t svcls_z(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s64_z))) svuint64_t svcls_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcls_s16_z))) svuint16_t svcls_z(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u8_m))) svuint8_t svclz_m(svuint8_t, svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u32_m))) svuint32_t svclz_m(svuint32_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u64_m))) svuint64_t svclz_m(svuint64_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u16_m))) svuint16_t svclz_m(svuint16_t, svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s8_m))) svuint8_t svclz_m(svuint8_t, svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s32_m))) svuint32_t svclz_m(svuint32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s64_m))) svuint64_t svclz_m(svuint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s16_m))) svuint16_t svclz_m(svuint16_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u8_x))) svuint8_t svclz_x(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u32_x))) svuint32_t svclz_x(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u64_x))) svuint64_t svclz_x(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u16_x))) svuint16_t svclz_x(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s8_x))) svuint8_t svclz_x(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s32_x))) svuint32_t svclz_x(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s64_x))) svuint64_t svclz_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s16_x))) svuint16_t svclz_x(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u8_z))) svuint8_t svclz_z(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u32_z))) svuint32_t svclz_z(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u64_z))) svuint64_t svclz_z(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_u16_z))) svuint16_t svclz_z(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s8_z))) svuint8_t svclz_z(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s32_z))) svuint32_t svclz_z(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s64_z))) svuint64_t svclz_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclz_s16_z))) svuint16_t svclz_z(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f64_m))) svfloat64_t svcmla_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f32_m))) svfloat32_t svcmla_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f16_m))) svfloat16_t svcmla_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f64_x))) svfloat64_t svcmla_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f32_x))) svfloat32_t svcmla_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f16_x))) svfloat16_t svcmla_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f64_z))) svfloat64_t svcmla_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f32_z))) svfloat32_t svcmla_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_f16_z))) svfloat16_t svcmla_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_lane_f32))) svfloat32_t svcmla_lane(svfloat32_t, svfloat32_t, svfloat32_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_lane_f16))) svfloat16_t svcmla_lane(svfloat16_t, svfloat16_t, svfloat16_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_f64))) svbool_t svcmpeq(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_f32))) svbool_t svcmpeq(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_f16))) svbool_t svcmpeq(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_u8))) svbool_t svcmpeq(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_u32))) svbool_t svcmpeq(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_u64))) svbool_t svcmpeq(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_u16))) svbool_t svcmpeq(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_s8))) svbool_t svcmpeq(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_s32))) svbool_t svcmpeq(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_s64))) svbool_t svcmpeq(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_n_s16))) svbool_t svcmpeq(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_u8))) svbool_t svcmpeq(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_u32))) svbool_t svcmpeq(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_u64))) svbool_t svcmpeq(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_u16))) svbool_t svcmpeq(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_s8))) svbool_t svcmpeq(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_s32))) svbool_t svcmpeq(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_s64))) svbool_t svcmpeq(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_s16))) svbool_t svcmpeq(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_f64))) svbool_t svcmpeq(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_f32))) svbool_t svcmpeq(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_f16))) svbool_t svcmpeq(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_wide_n_s8))) svbool_t svcmpeq_wide(svbool_t, svint8_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_wide_n_s32))) svbool_t svcmpeq_wide(svbool_t, svint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_wide_n_s16))) svbool_t svcmpeq_wide(svbool_t, svint16_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_wide_s8))) svbool_t svcmpeq_wide(svbool_t, svint8_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_wide_s32))) svbool_t svcmpeq_wide(svbool_t, svint32_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpeq_wide_s16))) svbool_t svcmpeq_wide(svbool_t, svint16_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_f64))) svbool_t svcmpge(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_f32))) svbool_t svcmpge(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_f16))) svbool_t svcmpge(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_s8))) svbool_t svcmpge(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_s32))) svbool_t svcmpge(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_s64))) svbool_t svcmpge(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_s16))) svbool_t svcmpge(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_u8))) svbool_t svcmpge(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_u32))) svbool_t svcmpge(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_u64))) svbool_t svcmpge(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_n_u16))) svbool_t svcmpge(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_s8))) svbool_t svcmpge(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_s32))) svbool_t svcmpge(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_s64))) svbool_t svcmpge(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_s16))) svbool_t svcmpge(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_f64))) svbool_t svcmpge(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_f32))) svbool_t svcmpge(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_f16))) svbool_t svcmpge(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_u8))) svbool_t svcmpge(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_u32))) svbool_t svcmpge(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_u64))) svbool_t svcmpge(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_u16))) svbool_t svcmpge(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_n_s8))) svbool_t svcmpge_wide(svbool_t, svint8_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_n_s32))) svbool_t svcmpge_wide(svbool_t, svint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_n_s16))) svbool_t svcmpge_wide(svbool_t, svint16_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_n_u8))) svbool_t svcmpge_wide(svbool_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_n_u32))) svbool_t svcmpge_wide(svbool_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_n_u16))) svbool_t svcmpge_wide(svbool_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_s8))) svbool_t svcmpge_wide(svbool_t, svint8_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_s32))) svbool_t svcmpge_wide(svbool_t, svint32_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_s16))) svbool_t svcmpge_wide(svbool_t, svint16_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_u8))) svbool_t svcmpge_wide(svbool_t, svuint8_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_u32))) svbool_t svcmpge_wide(svbool_t, svuint32_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpge_wide_u16))) svbool_t svcmpge_wide(svbool_t, svuint16_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_f64))) svbool_t svcmpgt(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_f32))) svbool_t svcmpgt(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_f16))) svbool_t svcmpgt(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_s8))) svbool_t svcmpgt(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_s32))) svbool_t svcmpgt(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_s64))) svbool_t svcmpgt(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_s16))) svbool_t svcmpgt(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_u8))) svbool_t svcmpgt(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_u32))) svbool_t svcmpgt(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_u64))) svbool_t svcmpgt(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_n_u16))) svbool_t svcmpgt(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_s8))) svbool_t svcmpgt(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_s32))) svbool_t svcmpgt(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_s64))) svbool_t svcmpgt(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_s16))) svbool_t svcmpgt(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_f64))) svbool_t svcmpgt(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_f32))) svbool_t svcmpgt(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_f16))) svbool_t svcmpgt(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_u8))) svbool_t svcmpgt(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_u32))) svbool_t svcmpgt(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_u64))) svbool_t svcmpgt(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_u16))) svbool_t svcmpgt(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_n_s8))) svbool_t svcmpgt_wide(svbool_t, svint8_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_n_s32))) svbool_t svcmpgt_wide(svbool_t, svint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_n_s16))) svbool_t svcmpgt_wide(svbool_t, svint16_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_n_u8))) svbool_t svcmpgt_wide(svbool_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_n_u32))) svbool_t svcmpgt_wide(svbool_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_n_u16))) svbool_t svcmpgt_wide(svbool_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_s8))) svbool_t svcmpgt_wide(svbool_t, svint8_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_s32))) svbool_t svcmpgt_wide(svbool_t, svint32_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_s16))) svbool_t svcmpgt_wide(svbool_t, svint16_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_u8))) svbool_t svcmpgt_wide(svbool_t, svuint8_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_u32))) svbool_t svcmpgt_wide(svbool_t, svuint32_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpgt_wide_u16))) svbool_t svcmpgt_wide(svbool_t, svuint16_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_f64))) svbool_t svcmple(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_f32))) svbool_t svcmple(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_f16))) svbool_t svcmple(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_s8))) svbool_t svcmple(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_s32))) svbool_t svcmple(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_s64))) svbool_t svcmple(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_s16))) svbool_t svcmple(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_u8))) svbool_t svcmple(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_u32))) svbool_t svcmple(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_u64))) svbool_t svcmple(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_n_u16))) svbool_t svcmple(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_s8))) svbool_t svcmple(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_s32))) svbool_t svcmple(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_s64))) svbool_t svcmple(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_s16))) svbool_t svcmple(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_f64))) svbool_t svcmple(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_f32))) svbool_t svcmple(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_f16))) svbool_t svcmple(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_u8))) svbool_t svcmple(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_u32))) svbool_t svcmple(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_u64))) svbool_t svcmple(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_u16))) svbool_t svcmple(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_n_s8))) svbool_t svcmple_wide(svbool_t, svint8_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_n_s32))) svbool_t svcmple_wide(svbool_t, svint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_n_s16))) svbool_t svcmple_wide(svbool_t, svint16_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_n_u8))) svbool_t svcmple_wide(svbool_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_n_u32))) svbool_t svcmple_wide(svbool_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_n_u16))) svbool_t svcmple_wide(svbool_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_s8))) svbool_t svcmple_wide(svbool_t, svint8_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_s32))) svbool_t svcmple_wide(svbool_t, svint32_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_s16))) svbool_t svcmple_wide(svbool_t, svint16_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_u8))) svbool_t svcmple_wide(svbool_t, svuint8_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_u32))) svbool_t svcmple_wide(svbool_t, svuint32_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmple_wide_u16))) svbool_t svcmple_wide(svbool_t, svuint16_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_u8))) svbool_t svcmplt(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_u32))) svbool_t svcmplt(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_u64))) svbool_t svcmplt(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_u16))) svbool_t svcmplt(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_f64))) svbool_t svcmplt(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_f32))) svbool_t svcmplt(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_f16))) svbool_t svcmplt(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_s8))) svbool_t svcmplt(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_s32))) svbool_t svcmplt(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_s64))) svbool_t svcmplt(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_n_s16))) svbool_t svcmplt(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_u8))) svbool_t svcmplt(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_u32))) svbool_t svcmplt(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_u64))) svbool_t svcmplt(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_u16))) svbool_t svcmplt(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_s8))) svbool_t svcmplt(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_s32))) svbool_t svcmplt(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_s64))) svbool_t svcmplt(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_s16))) svbool_t svcmplt(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_f64))) svbool_t svcmplt(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_f32))) svbool_t svcmplt(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_f16))) svbool_t svcmplt(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_n_u8))) svbool_t svcmplt_wide(svbool_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_n_u32))) svbool_t svcmplt_wide(svbool_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_n_u16))) svbool_t svcmplt_wide(svbool_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_n_s8))) svbool_t svcmplt_wide(svbool_t, svint8_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_n_s32))) svbool_t svcmplt_wide(svbool_t, svint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_n_s16))) svbool_t svcmplt_wide(svbool_t, svint16_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_u8))) svbool_t svcmplt_wide(svbool_t, svuint8_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_u32))) svbool_t svcmplt_wide(svbool_t, svuint32_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_u16))) svbool_t svcmplt_wide(svbool_t, svuint16_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_s8))) svbool_t svcmplt_wide(svbool_t, svint8_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_s32))) svbool_t svcmplt_wide(svbool_t, svint32_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmplt_wide_s16))) svbool_t svcmplt_wide(svbool_t, svint16_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_f64))) svbool_t svcmpne(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_f32))) svbool_t svcmpne(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_f16))) svbool_t svcmpne(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_u8))) svbool_t svcmpne(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_u32))) svbool_t svcmpne(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_u64))) svbool_t svcmpne(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_u16))) svbool_t svcmpne(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_s8))) svbool_t svcmpne(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_s32))) svbool_t svcmpne(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_s64))) svbool_t svcmpne(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_n_s16))) svbool_t svcmpne(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_u8))) svbool_t svcmpne(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_u32))) svbool_t svcmpne(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_u64))) svbool_t svcmpne(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_u16))) svbool_t svcmpne(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_s8))) svbool_t svcmpne(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_s32))) svbool_t svcmpne(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_s64))) svbool_t svcmpne(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_s16))) svbool_t svcmpne(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_f64))) svbool_t svcmpne(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_f32))) svbool_t svcmpne(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_f16))) svbool_t svcmpne(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_wide_n_s8))) svbool_t svcmpne_wide(svbool_t, svint8_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_wide_n_s32))) svbool_t svcmpne_wide(svbool_t, svint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_wide_n_s16))) svbool_t svcmpne_wide(svbool_t, svint16_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_wide_s8))) svbool_t svcmpne_wide(svbool_t, svint8_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_wide_s32))) svbool_t svcmpne_wide(svbool_t, svint32_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpne_wide_s16))) svbool_t svcmpne_wide(svbool_t, svint16_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpuo_n_f64))) svbool_t svcmpuo(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpuo_n_f32))) svbool_t svcmpuo(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpuo_n_f16))) svbool_t svcmpuo(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpuo_f64))) svbool_t svcmpuo(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpuo_f32))) svbool_t svcmpuo(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmpuo_f16))) svbool_t svcmpuo(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u8_m))) svuint8_t svcnot_m(svuint8_t, svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u32_m))) svuint32_t svcnot_m(svuint32_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u64_m))) svuint64_t svcnot_m(svuint64_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u16_m))) svuint16_t svcnot_m(svuint16_t, svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s8_m))) svint8_t svcnot_m(svint8_t, svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s32_m))) svint32_t svcnot_m(svint32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s64_m))) svint64_t svcnot_m(svint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s16_m))) svint16_t svcnot_m(svint16_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u8_x))) svuint8_t svcnot_x(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u32_x))) svuint32_t svcnot_x(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u64_x))) svuint64_t svcnot_x(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u16_x))) svuint16_t svcnot_x(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s8_x))) svint8_t svcnot_x(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s32_x))) svint32_t svcnot_x(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s64_x))) svint64_t svcnot_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s16_x))) svint16_t svcnot_x(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u8_z))) svuint8_t svcnot_z(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u32_z))) svuint32_t svcnot_z(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u64_z))) svuint64_t svcnot_z(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_u16_z))) svuint16_t svcnot_z(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s8_z))) svint8_t svcnot_z(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s32_z))) svint32_t svcnot_z(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s64_z))) svint64_t svcnot_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnot_s16_z))) svint16_t svcnot_z(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u8_m))) svuint8_t svcnt_m(svuint8_t, svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u32_m))) svuint32_t svcnt_m(svuint32_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u64_m))) svuint64_t svcnt_m(svuint64_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u16_m))) svuint16_t svcnt_m(svuint16_t, svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s8_m))) svuint8_t svcnt_m(svuint8_t, svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f64_m))) svuint64_t svcnt_m(svuint64_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f32_m))) svuint32_t svcnt_m(svuint32_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f16_m))) svuint16_t svcnt_m(svuint16_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s32_m))) svuint32_t svcnt_m(svuint32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s64_m))) svuint64_t svcnt_m(svuint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s16_m))) svuint16_t svcnt_m(svuint16_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u8_x))) svuint8_t svcnt_x(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u32_x))) svuint32_t svcnt_x(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u64_x))) svuint64_t svcnt_x(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u16_x))) svuint16_t svcnt_x(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s8_x))) svuint8_t svcnt_x(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f64_x))) svuint64_t svcnt_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f32_x))) svuint32_t svcnt_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f16_x))) svuint16_t svcnt_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s32_x))) svuint32_t svcnt_x(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s64_x))) svuint64_t svcnt_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s16_x))) svuint16_t svcnt_x(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u8_z))) svuint8_t svcnt_z(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u32_z))) svuint32_t svcnt_z(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u64_z))) svuint64_t svcnt_z(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u16_z))) svuint16_t svcnt_z(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s8_z))) svuint8_t svcnt_z(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f64_z))) svuint64_t svcnt_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f32_z))) svuint32_t svcnt_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f16_z))) svuint16_t svcnt_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s32_z))) svuint32_t svcnt_z(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s64_z))) svuint64_t svcnt_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s16_z))) svuint16_t svcnt_z(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_u32))) svuint32_t svcompact(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_u64))) svuint64_t svcompact(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_f64))) svfloat64_t svcompact(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_f32))) svfloat32_t svcompact(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s32))) svint32_t svcompact(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s64))) svint64_t svcompact(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_u8))) svuint8x2_t svcreate2(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_u32))) svuint32x2_t svcreate2(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_u64))) svuint64x2_t svcreate2(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_u16))) svuint16x2_t svcreate2(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_s8))) svint8x2_t svcreate2(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_f64))) svfloat64x2_t svcreate2(svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_f32))) svfloat32x2_t svcreate2(svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_f16))) svfloat16x2_t svcreate2(svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_s32))) svint32x2_t svcreate2(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_s64))) svint64x2_t svcreate2(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_s16))) svint16x2_t svcreate2(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_u8))) svuint8x3_t svcreate3(svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_u32))) svuint32x3_t svcreate3(svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_u64))) svuint64x3_t svcreate3(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_u16))) svuint16x3_t svcreate3(svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_s8))) svint8x3_t svcreate3(svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_f64))) svfloat64x3_t svcreate3(svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_f32))) svfloat32x3_t svcreate3(svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_f16))) svfloat16x3_t svcreate3(svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_s32))) svint32x3_t svcreate3(svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_s64))) svint64x3_t svcreate3(svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_s16))) svint16x3_t svcreate3(svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_u8))) svuint8x4_t svcreate4(svuint8_t, svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_u32))) svuint32x4_t svcreate4(svuint32_t, svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_u64))) svuint64x4_t svcreate4(svuint64_t, svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_u16))) svuint16x4_t svcreate4(svuint16_t, svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_s8))) svint8x4_t svcreate4(svint8_t, svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_f64))) svfloat64x4_t svcreate4(svfloat64_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_f32))) svfloat32x4_t svcreate4(svfloat32_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_f16))) svfloat16x4_t svcreate4(svfloat16_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_s32))) svint32x4_t svcreate4(svint32_t, svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_s64))) svint64x4_t svcreate4(svint64_t, svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_s16))) svint16x4_t svcreate4(svint16_t, svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_f32_m))) svfloat16_t svcvt_f16_m(svfloat16_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_f32_x))) svfloat16_t svcvt_f16_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_f32_z))) svfloat16_t svcvt_f16_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_f64_m))) svfloat16_t svcvt_f16_m(svfloat16_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_f64_x))) svfloat16_t svcvt_f16_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_f64_z))) svfloat16_t svcvt_f16_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s16_m))) svfloat16_t svcvt_f16_m(svfloat16_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s16_x))) svfloat16_t svcvt_f16_x(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s16_z))) svfloat16_t svcvt_f16_z(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s32_m))) svfloat16_t svcvt_f16_m(svfloat16_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s32_x))) svfloat16_t svcvt_f16_x(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s32_z))) svfloat16_t svcvt_f16_z(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s64_m))) svfloat16_t svcvt_f16_m(svfloat16_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s64_x))) svfloat16_t svcvt_f16_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_s64_z))) svfloat16_t svcvt_f16_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u16_m))) svfloat16_t svcvt_f16_m(svfloat16_t, svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u16_x))) svfloat16_t svcvt_f16_x(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u16_z))) svfloat16_t svcvt_f16_z(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u32_m))) svfloat16_t svcvt_f16_m(svfloat16_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u32_x))) svfloat16_t svcvt_f16_x(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u32_z))) svfloat16_t svcvt_f16_z(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u64_m))) svfloat16_t svcvt_f16_m(svfloat16_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u64_x))) svfloat16_t svcvt_f16_x(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_u64_z))) svfloat16_t svcvt_f16_z(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_f16_m))) svfloat32_t svcvt_f32_m(svfloat32_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_f16_x))) svfloat32_t svcvt_f32_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_f16_z))) svfloat32_t svcvt_f32_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_f64_m))) svfloat32_t svcvt_f32_m(svfloat32_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_f64_x))) svfloat32_t svcvt_f32_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_f64_z))) svfloat32_t svcvt_f32_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s32_m))) svfloat32_t svcvt_f32_m(svfloat32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s32_x))) svfloat32_t svcvt_f32_x(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s32_z))) svfloat32_t svcvt_f32_z(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s64_m))) svfloat32_t svcvt_f32_m(svfloat32_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s64_x))) svfloat32_t svcvt_f32_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_s64_z))) svfloat32_t svcvt_f32_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u32_m))) svfloat32_t svcvt_f32_m(svfloat32_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u32_x))) svfloat32_t svcvt_f32_x(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u32_z))) svfloat32_t svcvt_f32_z(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u64_m))) svfloat32_t svcvt_f32_m(svfloat32_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u64_x))) svfloat32_t svcvt_f32_x(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f32_u64_z))) svfloat32_t svcvt_f32_z(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_f16_m))) svfloat64_t svcvt_f64_m(svfloat64_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_f16_x))) svfloat64_t svcvt_f64_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_f16_z))) svfloat64_t svcvt_f64_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_f32_m))) svfloat64_t svcvt_f64_m(svfloat64_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_f32_x))) svfloat64_t svcvt_f64_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_f32_z))) svfloat64_t svcvt_f64_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_s32_m))) svfloat64_t svcvt_f64_m(svfloat64_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_s32_x))) svfloat64_t svcvt_f64_x(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_s32_z))) svfloat64_t svcvt_f64_z(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_s64_m))) svfloat64_t svcvt_f64_m(svfloat64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_s64_x))) svfloat64_t svcvt_f64_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_s64_z))) svfloat64_t svcvt_f64_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_u32_m))) svfloat64_t svcvt_f64_m(svfloat64_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_u32_x))) svfloat64_t svcvt_f64_x(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_u32_z))) svfloat64_t svcvt_f64_z(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_u64_m))) svfloat64_t svcvt_f64_m(svfloat64_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_u64_x))) svfloat64_t svcvt_f64_x(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f64_u64_z))) svfloat64_t svcvt_f64_z(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s16_f16_m))) svint16_t svcvt_s16_m(svint16_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s16_f16_x))) svint16_t svcvt_s16_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s16_f16_z))) svint16_t svcvt_s16_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f16_m))) svint32_t svcvt_s32_m(svint32_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f16_x))) svint32_t svcvt_s32_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f16_z))) svint32_t svcvt_s32_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f32_m))) svint32_t svcvt_s32_m(svint32_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f32_x))) svint32_t svcvt_s32_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f32_z))) svint32_t svcvt_s32_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f64_m))) svint32_t svcvt_s32_m(svint32_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f64_x))) svint32_t svcvt_s32_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s32_f64_z))) svint32_t svcvt_s32_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f16_m))) svint64_t svcvt_s64_m(svint64_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f16_x))) svint64_t svcvt_s64_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f16_z))) svint64_t svcvt_s64_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f32_m))) svint64_t svcvt_s64_m(svint64_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f32_x))) svint64_t svcvt_s64_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f32_z))) svint64_t svcvt_s64_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f64_m))) svint64_t svcvt_s64_m(svint64_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f64_x))) svint64_t svcvt_s64_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_s64_f64_z))) svint64_t svcvt_s64_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u16_f16_m))) svuint16_t svcvt_u16_m(svuint16_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u16_f16_x))) svuint16_t svcvt_u16_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u16_f16_z))) svuint16_t svcvt_u16_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f16_m))) svuint32_t svcvt_u32_m(svuint32_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f16_x))) svuint32_t svcvt_u32_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f16_z))) svuint32_t svcvt_u32_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f32_m))) svuint32_t svcvt_u32_m(svuint32_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f32_x))) svuint32_t svcvt_u32_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f32_z))) svuint32_t svcvt_u32_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f64_m))) svuint32_t svcvt_u32_m(svuint32_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f64_x))) svuint32_t svcvt_u32_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u32_f64_z))) svuint32_t svcvt_u32_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f16_m))) svuint64_t svcvt_u64_m(svuint64_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f16_x))) svuint64_t svcvt_u64_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f16_z))) svuint64_t svcvt_u64_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f32_m))) svuint64_t svcvt_u64_m(svuint64_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f32_x))) svuint64_t svcvt_u64_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f32_z))) svuint64_t svcvt_u64_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f64_m))) svuint64_t svcvt_u64_m(svuint64_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f64_x))) svuint64_t svcvt_u64_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_u64_f64_z))) svuint64_t svcvt_u64_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f64_m))) svfloat64_t svdiv_m(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f32_m))) svfloat32_t svdiv_m(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f16_m))) svfloat16_t svdiv_m(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f64_x))) svfloat64_t svdiv_x(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f32_x))) svfloat32_t svdiv_x(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f16_x))) svfloat16_t svdiv_x(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f64_z))) svfloat64_t svdiv_z(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f32_z))) svfloat32_t svdiv_z(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_f16_z))) svfloat16_t svdiv_z(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_s32_m))) svint32_t svdiv_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_s64_m))) svint64_t svdiv_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_s32_x))) svint32_t svdiv_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_s64_x))) svint64_t svdiv_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_s32_z))) svint32_t svdiv_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_s64_z))) svint64_t svdiv_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_u32_m))) svuint32_t svdiv_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_u64_m))) svuint64_t svdiv_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_u32_x))) svuint32_t svdiv_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_u64_x))) svuint64_t svdiv_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_u32_z))) svuint32_t svdiv_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_n_u64_z))) svuint64_t svdiv_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f64_m))) svfloat64_t svdiv_m(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f32_m))) svfloat32_t svdiv_m(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f16_m))) svfloat16_t svdiv_m(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f64_x))) svfloat64_t svdiv_x(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f32_x))) svfloat32_t svdiv_x(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f16_x))) svfloat16_t svdiv_x(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f64_z))) svfloat64_t svdiv_z(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f32_z))) svfloat32_t svdiv_z(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_f16_z))) svfloat16_t svdiv_z(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_s32_m))) svint32_t svdiv_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_s64_m))) svint64_t svdiv_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_s32_x))) svint32_t svdiv_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_s64_x))) svint64_t svdiv_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_s32_z))) svint32_t svdiv_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_s64_z))) svint64_t svdiv_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_u32_m))) svuint32_t svdiv_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_u64_m))) svuint64_t svdiv_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_u32_x))) svuint32_t svdiv_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_u64_x))) svuint64_t svdiv_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_u32_z))) svuint32_t svdiv_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdiv_u64_z))) svuint64_t svdiv_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f64_m))) svfloat64_t svdivr_m(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f32_m))) svfloat32_t svdivr_m(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f16_m))) svfloat16_t svdivr_m(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f64_x))) svfloat64_t svdivr_x(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f32_x))) svfloat32_t svdivr_x(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f16_x))) svfloat16_t svdivr_x(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f64_z))) svfloat64_t svdivr_z(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f32_z))) svfloat32_t svdivr_z(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_f16_z))) svfloat16_t svdivr_z(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_s32_m))) svint32_t svdivr_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_s64_m))) svint64_t svdivr_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_s32_x))) svint32_t svdivr_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_s64_x))) svint64_t svdivr_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_s32_z))) svint32_t svdivr_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_s64_z))) svint64_t svdivr_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_u32_m))) svuint32_t svdivr_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_u64_m))) svuint64_t svdivr_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_u32_x))) svuint32_t svdivr_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_u64_x))) svuint64_t svdivr_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_u32_z))) svuint32_t svdivr_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_n_u64_z))) svuint64_t svdivr_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f64_m))) svfloat64_t svdivr_m(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f32_m))) svfloat32_t svdivr_m(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f16_m))) svfloat16_t svdivr_m(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f64_x))) svfloat64_t svdivr_x(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f32_x))) svfloat32_t svdivr_x(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f16_x))) svfloat16_t svdivr_x(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f64_z))) svfloat64_t svdivr_z(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f32_z))) svfloat32_t svdivr_z(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_f16_z))) svfloat16_t svdivr_z(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_s32_m))) svint32_t svdivr_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_s64_m))) svint64_t svdivr_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_s32_x))) svint32_t svdivr_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_s64_x))) svint64_t svdivr_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_s32_z))) svint32_t svdivr_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_s64_z))) svint64_t svdivr_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_u32_m))) svuint32_t svdivr_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_u64_m))) svuint64_t svdivr_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_u32_x))) svuint32_t svdivr_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_u64_x))) svuint64_t svdivr_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_u32_z))) svuint32_t svdivr_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdivr_u64_z))) svuint64_t svdivr_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_s32))) svint32_t svdot(svint32_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_s64))) svint64_t svdot(svint64_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_u32))) svuint32_t svdot(svuint32_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_u64))) svuint64_t svdot(svuint64_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_s32))) svint32_t svdot(svint32_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_s64))) svint64_t svdot(svint64_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_u32))) svuint32_t svdot(svuint32_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_u64))) svuint64_t svdot(svuint64_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_s32))) svint32_t svdot_lane(svint32_t, svint8_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_s64))) svint64_t svdot_lane(svint64_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_u32))) svuint32_t svdot_lane(svuint32_t, svuint8_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_u64))) svuint64_t svdot_lane(svuint64_t, svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u8))) svuint8_t svdup_u8(uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u32))) svuint32_t svdup_u32(uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u64))) svuint64_t svdup_u64(uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u16))) svuint16_t svdup_u16(uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s8))) svint8_t svdup_s8(int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f64))) svfloat64_t svdup_f64(float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f32))) svfloat32_t svdup_f32(float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f16))) svfloat16_t svdup_f16(float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s32))) svint32_t svdup_s32(int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s64))) svint64_t svdup_s64(int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s16))) svint16_t svdup_s16(int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u8_m))) svuint8_t svdup_u8_m(svuint8_t, svbool_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u32_m))) svuint32_t svdup_u32_m(svuint32_t, svbool_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u64_m))) svuint64_t svdup_u64_m(svuint64_t, svbool_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u16_m))) svuint16_t svdup_u16_m(svuint16_t, svbool_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s8_m))) svint8_t svdup_s8_m(svint8_t, svbool_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f64_m))) svfloat64_t svdup_f64_m(svfloat64_t, svbool_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f32_m))) svfloat32_t svdup_f32_m(svfloat32_t, svbool_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f16_m))) svfloat16_t svdup_f16_m(svfloat16_t, svbool_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s32_m))) svint32_t svdup_s32_m(svint32_t, svbool_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s64_m))) svint64_t svdup_s64_m(svint64_t, svbool_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s16_m))) svint16_t svdup_s16_m(svint16_t, svbool_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_b8))) svbool_t svdup_b8(bool); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_b32))) svbool_t svdup_b32(bool); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_b64))) svbool_t svdup_b64(bool); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_b16))) svbool_t svdup_b16(bool); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u8_x))) svuint8_t svdup_u8_x(svbool_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u32_x))) svuint32_t svdup_u32_x(svbool_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u64_x))) svuint64_t svdup_u64_x(svbool_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u16_x))) svuint16_t svdup_u16_x(svbool_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s8_x))) svint8_t svdup_s8_x(svbool_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f64_x))) svfloat64_t svdup_f64_x(svbool_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f32_x))) svfloat32_t svdup_f32_x(svbool_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f16_x))) svfloat16_t svdup_f16_x(svbool_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s32_x))) svint32_t svdup_s32_x(svbool_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s64_x))) svint64_t svdup_s64_x(svbool_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s16_x))) svint16_t svdup_s16_x(svbool_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u8_z))) svuint8_t svdup_u8_z(svbool_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u32_z))) svuint32_t svdup_u32_z(svbool_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u64_z))) svuint64_t svdup_u64_z(svbool_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u16_z))) svuint16_t svdup_u16_z(svbool_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s8_z))) svint8_t svdup_s8_z(svbool_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f64_z))) svfloat64_t svdup_f64_z(svbool_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f32_z))) svfloat32_t svdup_f32_z(svbool_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f16_z))) svfloat16_t svdup_f16_z(svbool_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s32_z))) svint32_t svdup_s32_z(svbool_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s64_z))) svint64_t svdup_s64_z(svbool_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s16_z))) svint16_t svdup_s16_z(svbool_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_u8))) svuint8_t svdup_lane(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_u32))) svuint32_t svdup_lane(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_u64))) svuint64_t svdup_lane(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_u16))) svuint16_t svdup_lane(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_s8))) svint8_t svdup_lane(svint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_f64))) svfloat64_t svdup_lane(svfloat64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_f32))) svfloat32_t svdup_lane(svfloat32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_f16))) svfloat16_t svdup_lane(svfloat16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_s32))) svint32_t svdup_lane(svint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_s64))) svint64_t svdup_lane(svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_s16))) svint16_t svdup_lane(svint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_u8))) svuint8_t svdupq_u8(uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_s8))) svint8_t svdupq_s8(int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_u16))) svuint16_t svdupq_u16(uint16_t, uint16_t, uint16_t, uint16_t, uint16_t, uint16_t, uint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_f16))) svfloat16_t svdupq_f16(float16_t, float16_t, float16_t, float16_t, float16_t, float16_t, float16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_s16))) svint16_t svdupq_s16(int16_t, int16_t, int16_t, int16_t, int16_t, int16_t, int16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_u32))) svuint32_t svdupq_u32(uint32_t, uint32_t, uint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_f32))) svfloat32_t svdupq_f32(float32_t, float32_t, float32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_s32))) svint32_t svdupq_s32(int32_t, int32_t, int32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_u64))) svuint64_t svdupq_u64(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_f64))) svfloat64_t svdupq_f64(float64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_s64))) svint64_t svdupq_s64(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_b8))) svbool_t svdupq_b8(bool, bool, bool, bool, bool, bool, bool, bool, bool, bool, bool, bool, bool, bool, bool, bool); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_b16))) svbool_t svdupq_b16(bool, bool, bool, bool, bool, bool, bool, bool); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_b32))) svbool_t svdupq_b32(bool, bool, bool, bool); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_b64))) svbool_t svdupq_b64(bool, bool); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_u8))) svuint8_t svdupq_lane(svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_u32))) svuint32_t svdupq_lane(svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_u64))) svuint64_t svdupq_lane(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_u16))) svuint16_t svdupq_lane(svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_s8))) svint8_t svdupq_lane(svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_f64))) svfloat64_t svdupq_lane(svfloat64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_f32))) svfloat32_t svdupq_lane(svfloat32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_f16))) svfloat16_t svdupq_lane(svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_s32))) svint32_t svdupq_lane(svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_s64))) svint64_t svdupq_lane(svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_s16))) svint16_t svdupq_lane(svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_b_z))) svbool_t sveor_z(svbool_t, svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u8_m))) svuint8_t sveor_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u32_m))) svuint32_t sveor_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u64_m))) svuint64_t sveor_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u16_m))) svuint16_t sveor_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s8_m))) svint8_t sveor_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s32_m))) svint32_t sveor_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s64_m))) svint64_t sveor_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s16_m))) svint16_t sveor_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u8_x))) svuint8_t sveor_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u32_x))) svuint32_t sveor_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u64_x))) svuint64_t sveor_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u16_x))) svuint16_t sveor_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s8_x))) svint8_t sveor_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s32_x))) svint32_t sveor_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s64_x))) svint64_t sveor_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s16_x))) svint16_t sveor_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u8_z))) svuint8_t sveor_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u32_z))) svuint32_t sveor_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u64_z))) svuint64_t sveor_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_u16_z))) svuint16_t sveor_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s8_z))) svint8_t sveor_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s32_z))) svint32_t sveor_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s64_z))) svint64_t sveor_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_n_s16_z))) svint16_t sveor_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u8_m))) svuint8_t sveor_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u32_m))) svuint32_t sveor_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u64_m))) svuint64_t sveor_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u16_m))) svuint16_t sveor_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s8_m))) svint8_t sveor_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s32_m))) svint32_t sveor_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s64_m))) svint64_t sveor_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s16_m))) svint16_t sveor_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u8_x))) svuint8_t sveor_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u32_x))) svuint32_t sveor_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u64_x))) svuint64_t sveor_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u16_x))) svuint16_t sveor_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s8_x))) svint8_t sveor_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s32_x))) svint32_t sveor_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s64_x))) svint64_t sveor_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s16_x))) svint16_t sveor_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u8_z))) svuint8_t sveor_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u32_z))) svuint32_t sveor_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u64_z))) svuint64_t sveor_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_u16_z))) svuint16_t sveor_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s8_z))) svint8_t sveor_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s32_z))) svint32_t sveor_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s64_z))) svint64_t sveor_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor_s16_z))) svint16_t sveor_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorv_u8))) uint8_t sveorv(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorv_u32))) uint32_t sveorv(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorv_u64))) uint64_t sveorv(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorv_u16))) uint16_t sveorv(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorv_s8))) int8_t sveorv(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorv_s32))) int32_t sveorv(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorv_s64))) int64_t sveorv(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorv_s16))) int16_t sveorv(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpa_f64))) svfloat64_t svexpa(svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpa_f32))) svfloat32_t svexpa(svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpa_f16))) svfloat16_t svexpa(svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_u8))) svuint8_t svext(svuint8_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_u32))) svuint32_t svext(svuint32_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_u64))) svuint64_t svext(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_u16))) svuint16_t svext(svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_s8))) svint8_t svext(svint8_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_f64))) svfloat64_t svext(svfloat64_t, svfloat64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_f32))) svfloat32_t svext(svfloat32_t, svfloat32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_f16))) svfloat16_t svext(svfloat16_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_s32))) svint32_t svext(svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_s64))) svint64_t svext(svint64_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_s16))) svint16_t svext(svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s32_m))) svint32_t svextb_m(svint32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s64_m))) svint64_t svextb_m(svint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s16_m))) svint16_t svextb_m(svint16_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s32_x))) svint32_t svextb_x(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s64_x))) svint64_t svextb_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s16_x))) svint16_t svextb_x(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s32_z))) svint32_t svextb_z(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s64_z))) svint64_t svextb_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_s16_z))) svint16_t svextb_z(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u32_m))) svuint32_t svextb_m(svuint32_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u64_m))) svuint64_t svextb_m(svuint64_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u16_m))) svuint16_t svextb_m(svuint16_t, svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u32_x))) svuint32_t svextb_x(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u64_x))) svuint64_t svextb_x(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u16_x))) svuint16_t svextb_x(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u32_z))) svuint32_t svextb_z(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u64_z))) svuint64_t svextb_z(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextb_u16_z))) svuint16_t svextb_z(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_s32_m))) svint32_t svexth_m(svint32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_s64_m))) svint64_t svexth_m(svint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_s32_x))) svint32_t svexth_x(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_s64_x))) svint64_t svexth_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_s32_z))) svint32_t svexth_z(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_s64_z))) svint64_t svexth_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_u32_m))) svuint32_t svexth_m(svuint32_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_u64_m))) svuint64_t svexth_m(svuint64_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_u32_x))) svuint32_t svexth_x(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_u64_x))) svuint64_t svexth_x(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_u32_z))) svuint32_t svexth_z(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexth_u64_z))) svuint64_t svexth_z(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextw_s64_m))) svint64_t svextw_m(svint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextw_s64_x))) svint64_t svextw_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextw_s64_z))) svint64_t svextw_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextw_u64_m))) svuint64_t svextw_m(svuint64_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextw_u64_x))) svuint64_t svextw_x(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextw_u64_z))) svuint64_t svextw_z(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_u8))) svuint8_t svget2(svuint8x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_u32))) svuint32_t svget2(svuint32x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_u64))) svuint64_t svget2(svuint64x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_u16))) svuint16_t svget2(svuint16x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_s8))) svint8_t svget2(svint8x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_f64))) svfloat64_t svget2(svfloat64x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_f32))) svfloat32_t svget2(svfloat32x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_f16))) svfloat16_t svget2(svfloat16x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_s32))) svint32_t svget2(svint32x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_s64))) svint64_t svget2(svint64x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_s16))) svint16_t svget2(svint16x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_u8))) svuint8_t svget3(svuint8x3_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_u32))) svuint32_t svget3(svuint32x3_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_u64))) svuint64_t svget3(svuint64x3_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_u16))) svuint16_t svget3(svuint16x3_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_s8))) svint8_t svget3(svint8x3_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_f64))) svfloat64_t svget3(svfloat64x3_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_f32))) svfloat32_t svget3(svfloat32x3_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_f16))) svfloat16_t svget3(svfloat16x3_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_s32))) svint32_t svget3(svint32x3_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_s64))) svint64_t svget3(svint64x3_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_s16))) svint16_t svget3(svint16x3_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_u8))) svuint8_t svget4(svuint8x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_u32))) svuint32_t svget4(svuint32x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_u64))) svuint64_t svget4(svuint64x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_u16))) svuint16_t svget4(svuint16x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_s8))) svint8_t svget4(svint8x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_f64))) svfloat64_t svget4(svfloat64x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_f32))) svfloat32_t svget4(svfloat32x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_f16))) svfloat16_t svget4(svfloat16x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_s32))) svint32_t svget4(svint32x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_s64))) svint64_t svget4(svint64x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_s16))) svint16_t svget4(svint16x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_u8))) svuint8_t svinsr(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_u32))) svuint32_t svinsr(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_u64))) svuint64_t svinsr(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_u16))) svuint16_t svinsr(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_s8))) svint8_t svinsr(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_f64))) svfloat64_t svinsr(svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_f32))) svfloat32_t svinsr(svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_f16))) svfloat16_t svinsr(svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_s32))) svint32_t svinsr(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_s64))) svint64_t svinsr(svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_s16))) svint16_t svinsr(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_u8))) uint8_t svlasta(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_u32))) uint32_t svlasta(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_u64))) uint64_t svlasta(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_u16))) uint16_t svlasta(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_s8))) int8_t svlasta(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_f64))) float64_t svlasta(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_f32))) float32_t svlasta(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_f16))) float16_t svlasta(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_s32))) int32_t svlasta(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_s64))) int64_t svlasta(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_s16))) int16_t svlasta(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_u8))) uint8_t svlastb(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_u32))) uint32_t svlastb(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_u64))) uint64_t svlastb(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_u16))) uint16_t svlastb(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_s8))) int8_t svlastb(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_f64))) float64_t svlastb(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_f32))) float32_t svlastb(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_f16))) float16_t svlastb(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_s32))) int32_t svlastb(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_s64))) int64_t svlastb(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_s16))) int16_t svlastb(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8))) svuint8_t svld1(svbool_t, uint8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u32))) svuint32_t svld1(svbool_t, uint32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64))) svuint64_t svld1(svbool_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u16))) svuint16_t svld1(svbool_t, uint16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8))) svint8_t svld1(svbool_t, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64))) svfloat64_t svld1(svbool_t, float64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f32))) svfloat32_t svld1(svbool_t, float32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f16))) svfloat16_t svld1(svbool_t, float16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s32))) svint32_t svld1(svbool_t, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s64))) svint64_t svld1(svbool_t, int64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s16))) svint16_t svld1(svbool_t, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_index_u32))) svuint32_t svld1_gather_index_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_index_u64))) svuint64_t svld1_gather_index_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_index_f64))) svfloat64_t svld1_gather_index_f64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_index_f32))) svfloat32_t svld1_gather_index_f32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_index_s32))) svint32_t svld1_gather_index_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_index_s64))) svint64_t svld1_gather_index_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_offset_u32))) svuint32_t svld1_gather_offset_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_offset_u64))) svuint64_t svld1_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_offset_f64))) svfloat64_t svld1_gather_offset_f64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_offset_f32))) svfloat32_t svld1_gather_offset_f32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_offset_s32))) svint32_t svld1_gather_offset_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_offset_s64))) svint64_t svld1_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_u32))) svuint32_t svld1_gather_u32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_u64))) svuint64_t svld1_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_f64))) svfloat64_t svld1_gather_f64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_f32))) svfloat32_t svld1_gather_f32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_s32))) svint32_t svld1_gather_s32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_s64))) svint64_t svld1_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s32index_u32))) svuint32_t svld1_gather_index(svbool_t, uint32_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s32index_f32))) svfloat32_t svld1_gather_index(svbool_t, float32_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s32index_s32))) svint32_t svld1_gather_index(svbool_t, int32_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32index_u32))) svuint32_t svld1_gather_index(svbool_t, uint32_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32index_f32))) svfloat32_t svld1_gather_index(svbool_t, float32_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32index_s32))) svint32_t svld1_gather_index(svbool_t, int32_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s64index_u64))) svuint64_t svld1_gather_index(svbool_t, uint64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s64index_f64))) svfloat64_t svld1_gather_index(svbool_t, float64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s64index_s64))) svint64_t svld1_gather_index(svbool_t, int64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64index_u64))) svuint64_t svld1_gather_index(svbool_t, uint64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64index_f64))) svfloat64_t svld1_gather_index(svbool_t, float64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64index_s64))) svint64_t svld1_gather_index(svbool_t, int64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s32offset_u32))) svuint32_t svld1_gather_offset(svbool_t, uint32_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s32offset_f32))) svfloat32_t svld1_gather_offset(svbool_t, float32_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s32offset_s32))) svint32_t svld1_gather_offset(svbool_t, int32_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32offset_u32))) svuint32_t svld1_gather_offset(svbool_t, uint32_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32offset_f32))) svfloat32_t svld1_gather_offset(svbool_t, float32_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32offset_s32))) svint32_t svld1_gather_offset(svbool_t, int32_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s64offset_u64))) svuint64_t svld1_gather_offset(svbool_t, uint64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s64offset_f64))) svfloat64_t svld1_gather_offset(svbool_t, float64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_s64offset_s64))) svint64_t svld1_gather_offset(svbool_t, int64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64offset_u64))) svuint64_t svld1_gather_offset(svbool_t, uint64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64offset_f64))) svfloat64_t svld1_gather_offset(svbool_t, float64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64offset_s64))) svint64_t svld1_gather_offset(svbool_t, int64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8))) svuint8_t svld1_vnum(svbool_t, uint8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u32))) svuint32_t svld1_vnum(svbool_t, uint32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64))) svuint64_t svld1_vnum(svbool_t, uint64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u16))) svuint16_t svld1_vnum(svbool_t, uint16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8))) svint8_t svld1_vnum(svbool_t, int8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64))) svfloat64_t svld1_vnum(svbool_t, float64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f32))) svfloat32_t svld1_vnum(svbool_t, float32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f16))) svfloat16_t svld1_vnum(svbool_t, float16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s32))) svint32_t svld1_vnum(svbool_t, int32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s64))) svint64_t svld1_vnum(svbool_t, int64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s16))) svint16_t svld1_vnum(svbool_t, int16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_u8))) svuint8_t svld1rq(svbool_t, uint8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_u32))) svuint32_t svld1rq(svbool_t, uint32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_u64))) svuint64_t svld1rq(svbool_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_u16))) svuint16_t svld1rq(svbool_t, uint16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_s8))) svint8_t svld1rq(svbool_t, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_f64))) svfloat64_t svld1rq(svbool_t, float64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_f32))) svfloat32_t svld1rq(svbool_t, float32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_f16))) svfloat16_t svld1rq(svbool_t, float16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_s32))) svint32_t svld1rq(svbool_t, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_s64))) svint64_t svld1rq(svbool_t, int64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_s16))) svint16_t svld1rq(svbool_t, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u32base_offset_u32))) svuint32_t svld1sb_gather_offset_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u64base_offset_u64))) svuint64_t svld1sb_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u32base_offset_s32))) svint32_t svld1sb_gather_offset_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u64base_offset_s64))) svint64_t svld1sb_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u32base_u32))) svuint32_t svld1sb_gather_u32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u64base_u64))) svuint64_t svld1sb_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u32base_s32))) svint32_t svld1sb_gather_s32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u64base_s64))) svint64_t svld1sb_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_s32offset_u32))) svuint32_t svld1sb_gather_offset_u32(svbool_t, int8_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_s32offset_s32))) svint32_t svld1sb_gather_offset_s32(svbool_t, int8_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u32offset_u32))) svuint32_t svld1sb_gather_offset_u32(svbool_t, int8_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u32offset_s32))) svint32_t svld1sb_gather_offset_s32(svbool_t, int8_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_s64offset_u64))) svuint64_t svld1sb_gather_offset_u64(svbool_t, int8_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_s64offset_s64))) svint64_t svld1sb_gather_offset_s64(svbool_t, int8_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u64offset_u64))) svuint64_t svld1sb_gather_offset_u64(svbool_t, int8_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_gather_u64offset_s64))) svint64_t svld1sb_gather_offset_s64(svbool_t, int8_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32base_index_u32))) svuint32_t svld1sh_gather_index_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64base_index_u64))) svuint64_t svld1sh_gather_index_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32base_index_s32))) svint32_t svld1sh_gather_index_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64base_index_s64))) svint64_t svld1sh_gather_index_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32base_offset_u32))) svuint32_t svld1sh_gather_offset_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64base_offset_u64))) svuint64_t svld1sh_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32base_offset_s32))) svint32_t svld1sh_gather_offset_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64base_offset_s64))) svint64_t svld1sh_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32base_u32))) svuint32_t svld1sh_gather_u32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64base_u64))) svuint64_t svld1sh_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32base_s32))) svint32_t svld1sh_gather_s32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64base_s64))) svint64_t svld1sh_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_s32index_u32))) svuint32_t svld1sh_gather_index_u32(svbool_t, int16_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_s32index_s32))) svint32_t svld1sh_gather_index_s32(svbool_t, int16_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32index_u32))) svuint32_t svld1sh_gather_index_u32(svbool_t, int16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32index_s32))) svint32_t svld1sh_gather_index_s32(svbool_t, int16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_s64index_u64))) svuint64_t svld1sh_gather_index_u64(svbool_t, int16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_s64index_s64))) svint64_t svld1sh_gather_index_s64(svbool_t, int16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64index_u64))) svuint64_t svld1sh_gather_index_u64(svbool_t, int16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64index_s64))) svint64_t svld1sh_gather_index_s64(svbool_t, int16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_s32offset_u32))) svuint32_t svld1sh_gather_offset_u32(svbool_t, int16_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_s32offset_s32))) svint32_t svld1sh_gather_offset_s32(svbool_t, int16_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32offset_u32))) svuint32_t svld1sh_gather_offset_u32(svbool_t, int16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u32offset_s32))) svint32_t svld1sh_gather_offset_s32(svbool_t, int16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_s64offset_u64))) svuint64_t svld1sh_gather_offset_u64(svbool_t, int16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_s64offset_s64))) svint64_t svld1sh_gather_offset_s64(svbool_t, int16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64offset_u64))) svuint64_t svld1sh_gather_offset_u64(svbool_t, int16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sh_gather_u64offset_s64))) svint64_t svld1sh_gather_offset_s64(svbool_t, int16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64base_index_u64))) svuint64_t svld1sw_gather_index_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64base_index_s64))) svint64_t svld1sw_gather_index_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64base_offset_u64))) svuint64_t svld1sw_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64base_offset_s64))) svint64_t svld1sw_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64base_u64))) svuint64_t svld1sw_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64base_s64))) svint64_t svld1sw_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_s64index_u64))) svuint64_t svld1sw_gather_index_u64(svbool_t, int32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_s64index_s64))) svint64_t svld1sw_gather_index_s64(svbool_t, int32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64index_u64))) svuint64_t svld1sw_gather_index_u64(svbool_t, int32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64index_s64))) svint64_t svld1sw_gather_index_s64(svbool_t, int32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_s64offset_u64))) svuint64_t svld1sw_gather_offset_u64(svbool_t, int32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_s64offset_s64))) svint64_t svld1sw_gather_offset_s64(svbool_t, int32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64offset_u64))) svuint64_t svld1sw_gather_offset_u64(svbool_t, int32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sw_gather_u64offset_s64))) svint64_t svld1sw_gather_offset_s64(svbool_t, int32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u32base_offset_u32))) svuint32_t svld1ub_gather_offset_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u64base_offset_u64))) svuint64_t svld1ub_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u32base_offset_s32))) svint32_t svld1ub_gather_offset_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u64base_offset_s64))) svint64_t svld1ub_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u32base_u32))) svuint32_t svld1ub_gather_u32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u64base_u64))) svuint64_t svld1ub_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u32base_s32))) svint32_t svld1ub_gather_s32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u64base_s64))) svint64_t svld1ub_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_s32offset_u32))) svuint32_t svld1ub_gather_offset_u32(svbool_t, uint8_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_s32offset_s32))) svint32_t svld1ub_gather_offset_s32(svbool_t, uint8_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u32offset_u32))) svuint32_t svld1ub_gather_offset_u32(svbool_t, uint8_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u32offset_s32))) svint32_t svld1ub_gather_offset_s32(svbool_t, uint8_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_s64offset_u64))) svuint64_t svld1ub_gather_offset_u64(svbool_t, uint8_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_s64offset_s64))) svint64_t svld1ub_gather_offset_s64(svbool_t, uint8_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u64offset_u64))) svuint64_t svld1ub_gather_offset_u64(svbool_t, uint8_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ub_gather_u64offset_s64))) svint64_t svld1ub_gather_offset_s64(svbool_t, uint8_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32base_index_u32))) svuint32_t svld1uh_gather_index_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64base_index_u64))) svuint64_t svld1uh_gather_index_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32base_index_s32))) svint32_t svld1uh_gather_index_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64base_index_s64))) svint64_t svld1uh_gather_index_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32base_offset_u32))) svuint32_t svld1uh_gather_offset_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64base_offset_u64))) svuint64_t svld1uh_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32base_offset_s32))) svint32_t svld1uh_gather_offset_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64base_offset_s64))) svint64_t svld1uh_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32base_u32))) svuint32_t svld1uh_gather_u32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64base_u64))) svuint64_t svld1uh_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32base_s32))) svint32_t svld1uh_gather_s32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64base_s64))) svint64_t svld1uh_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_s32index_u32))) svuint32_t svld1uh_gather_index_u32(svbool_t, uint16_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_s32index_s32))) svint32_t svld1uh_gather_index_s32(svbool_t, uint16_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32index_u32))) svuint32_t svld1uh_gather_index_u32(svbool_t, uint16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32index_s32))) svint32_t svld1uh_gather_index_s32(svbool_t, uint16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_s64index_u64))) svuint64_t svld1uh_gather_index_u64(svbool_t, uint16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_s64index_s64))) svint64_t svld1uh_gather_index_s64(svbool_t, uint16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64index_u64))) svuint64_t svld1uh_gather_index_u64(svbool_t, uint16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64index_s64))) svint64_t svld1uh_gather_index_s64(svbool_t, uint16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_s32offset_u32))) svuint32_t svld1uh_gather_offset_u32(svbool_t, uint16_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_s32offset_s32))) svint32_t svld1uh_gather_offset_s32(svbool_t, uint16_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32offset_u32))) svuint32_t svld1uh_gather_offset_u32(svbool_t, uint16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u32offset_s32))) svint32_t svld1uh_gather_offset_s32(svbool_t, uint16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_s64offset_u64))) svuint64_t svld1uh_gather_offset_u64(svbool_t, uint16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_s64offset_s64))) svint64_t svld1uh_gather_offset_s64(svbool_t, uint16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64offset_u64))) svuint64_t svld1uh_gather_offset_u64(svbool_t, uint16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uh_gather_u64offset_s64))) svint64_t svld1uh_gather_offset_s64(svbool_t, uint16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64base_index_u64))) svuint64_t svld1uw_gather_index_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64base_index_s64))) svint64_t svld1uw_gather_index_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64base_offset_u64))) svuint64_t svld1uw_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64base_offset_s64))) svint64_t svld1uw_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64base_u64))) svuint64_t svld1uw_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64base_s64))) svint64_t svld1uw_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_s64index_u64))) svuint64_t svld1uw_gather_index_u64(svbool_t, uint32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_s64index_s64))) svint64_t svld1uw_gather_index_s64(svbool_t, uint32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64index_u64))) svuint64_t svld1uw_gather_index_u64(svbool_t, uint32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64index_s64))) svint64_t svld1uw_gather_index_s64(svbool_t, uint32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_s64offset_u64))) svuint64_t svld1uw_gather_offset_u64(svbool_t, uint32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_s64offset_s64))) svint64_t svld1uw_gather_offset_s64(svbool_t, uint32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64offset_u64))) svuint64_t svld1uw_gather_offset_u64(svbool_t, uint32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uw_gather_u64offset_s64))) svint64_t svld1uw_gather_offset_s64(svbool_t, uint32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_u8))) svuint8x2_t svld2(svbool_t, uint8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_u32))) svuint32x2_t svld2(svbool_t, uint32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_u64))) svuint64x2_t svld2(svbool_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_u16))) svuint16x2_t svld2(svbool_t, uint16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_s8))) svint8x2_t svld2(svbool_t, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_f64))) svfloat64x2_t svld2(svbool_t, float64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_f32))) svfloat32x2_t svld2(svbool_t, float32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_f16))) svfloat16x2_t svld2(svbool_t, float16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_s32))) svint32x2_t svld2(svbool_t, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_s64))) svint64x2_t svld2(svbool_t, int64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_s16))) svint16x2_t svld2(svbool_t, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_u8))) svuint8x2_t svld2_vnum(svbool_t, uint8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_u32))) svuint32x2_t svld2_vnum(svbool_t, uint32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_u64))) svuint64x2_t svld2_vnum(svbool_t, uint64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_u16))) svuint16x2_t svld2_vnum(svbool_t, uint16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_s8))) svint8x2_t svld2_vnum(svbool_t, int8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_f64))) svfloat64x2_t svld2_vnum(svbool_t, float64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_f32))) svfloat32x2_t svld2_vnum(svbool_t, float32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_f16))) svfloat16x2_t svld2_vnum(svbool_t, float16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_s32))) svint32x2_t svld2_vnum(svbool_t, int32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_s64))) svint64x2_t svld2_vnum(svbool_t, int64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_s16))) svint16x2_t svld2_vnum(svbool_t, int16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_u8))) svuint8x3_t svld3(svbool_t, uint8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_u32))) svuint32x3_t svld3(svbool_t, uint32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_u64))) svuint64x3_t svld3(svbool_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_u16))) svuint16x3_t svld3(svbool_t, uint16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_s8))) svint8x3_t svld3(svbool_t, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_f64))) svfloat64x3_t svld3(svbool_t, float64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_f32))) svfloat32x3_t svld3(svbool_t, float32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_f16))) svfloat16x3_t svld3(svbool_t, float16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_s32))) svint32x3_t svld3(svbool_t, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_s64))) svint64x3_t svld3(svbool_t, int64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_s16))) svint16x3_t svld3(svbool_t, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_u8))) svuint8x3_t svld3_vnum(svbool_t, uint8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_u32))) svuint32x3_t svld3_vnum(svbool_t, uint32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_u64))) svuint64x3_t svld3_vnum(svbool_t, uint64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_u16))) svuint16x3_t svld3_vnum(svbool_t, uint16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_s8))) svint8x3_t svld3_vnum(svbool_t, int8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_f64))) svfloat64x3_t svld3_vnum(svbool_t, float64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_f32))) svfloat32x3_t svld3_vnum(svbool_t, float32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_f16))) svfloat16x3_t svld3_vnum(svbool_t, float16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_s32))) svint32x3_t svld3_vnum(svbool_t, int32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_s64))) svint64x3_t svld3_vnum(svbool_t, int64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_s16))) svint16x3_t svld3_vnum(svbool_t, int16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_u8))) svuint8x4_t svld4(svbool_t, uint8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_u32))) svuint32x4_t svld4(svbool_t, uint32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_u64))) svuint64x4_t svld4(svbool_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_u16))) svuint16x4_t svld4(svbool_t, uint16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_s8))) svint8x4_t svld4(svbool_t, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_f64))) svfloat64x4_t svld4(svbool_t, float64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_f32))) svfloat32x4_t svld4(svbool_t, float32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_f16))) svfloat16x4_t svld4(svbool_t, float16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_s32))) svint32x4_t svld4(svbool_t, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_s64))) svint64x4_t svld4(svbool_t, int64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_s16))) svint16x4_t svld4(svbool_t, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_u8))) svuint8x4_t svld4_vnum(svbool_t, uint8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_u32))) svuint32x4_t svld4_vnum(svbool_t, uint32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_u64))) svuint64x4_t svld4_vnum(svbool_t, uint64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_u16))) svuint16x4_t svld4_vnum(svbool_t, uint16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_s8))) svint8x4_t svld4_vnum(svbool_t, int8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_f64))) svfloat64x4_t svld4_vnum(svbool_t, float64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_f32))) svfloat32x4_t svld4_vnum(svbool_t, float32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_f16))) svfloat16x4_t svld4_vnum(svbool_t, float16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_s32))) svint32x4_t svld4_vnum(svbool_t, int32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_s64))) svint64x4_t svld4_vnum(svbool_t, int64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_s16))) svint16x4_t svld4_vnum(svbool_t, int16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_u8))) svuint8_t svldff1(svbool_t, uint8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_u32))) svuint32_t svldff1(svbool_t, uint32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_u64))) svuint64_t svldff1(svbool_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_u16))) svuint16_t svldff1(svbool_t, uint16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_s8))) svint8_t svldff1(svbool_t, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_f64))) svfloat64_t svldff1(svbool_t, float64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_f32))) svfloat32_t svldff1(svbool_t, float32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_f16))) svfloat16_t svldff1(svbool_t, float16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_s32))) svint32_t svldff1(svbool_t, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_s64))) svint64_t svldff1(svbool_t, int64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_s16))) svint16_t svldff1(svbool_t, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_index_u32))) svuint32_t svldff1_gather_index_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_index_u64))) svuint64_t svldff1_gather_index_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_index_f64))) svfloat64_t svldff1_gather_index_f64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_index_f32))) svfloat32_t svldff1_gather_index_f32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_index_s32))) svint32_t svldff1_gather_index_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_index_s64))) svint64_t svldff1_gather_index_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_offset_u32))) svuint32_t svldff1_gather_offset_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_offset_u64))) svuint64_t svldff1_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_offset_f64))) svfloat64_t svldff1_gather_offset_f64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_offset_f32))) svfloat32_t svldff1_gather_offset_f32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_offset_s32))) svint32_t svldff1_gather_offset_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_offset_s64))) svint64_t svldff1_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_u32))) svuint32_t svldff1_gather_u32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_u64))) svuint64_t svldff1_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_f64))) svfloat64_t svldff1_gather_f64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_f32))) svfloat32_t svldff1_gather_f32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_s32))) svint32_t svldff1_gather_s32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64base_s64))) svint64_t svldff1_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s32index_u32))) svuint32_t svldff1_gather_index(svbool_t, uint32_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s32index_f32))) svfloat32_t svldff1_gather_index(svbool_t, float32_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s32index_s32))) svint32_t svldff1_gather_index(svbool_t, int32_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32index_u32))) svuint32_t svldff1_gather_index(svbool_t, uint32_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32index_f32))) svfloat32_t svldff1_gather_index(svbool_t, float32_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32index_s32))) svint32_t svldff1_gather_index(svbool_t, int32_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s64index_u64))) svuint64_t svldff1_gather_index(svbool_t, uint64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s64index_f64))) svfloat64_t svldff1_gather_index(svbool_t, float64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s64index_s64))) svint64_t svldff1_gather_index(svbool_t, int64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64index_u64))) svuint64_t svldff1_gather_index(svbool_t, uint64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64index_f64))) svfloat64_t svldff1_gather_index(svbool_t, float64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64index_s64))) svint64_t svldff1_gather_index(svbool_t, int64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s32offset_u32))) svuint32_t svldff1_gather_offset(svbool_t, uint32_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s32offset_f32))) svfloat32_t svldff1_gather_offset(svbool_t, float32_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s32offset_s32))) svint32_t svldff1_gather_offset(svbool_t, int32_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32offset_u32))) svuint32_t svldff1_gather_offset(svbool_t, uint32_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32offset_f32))) svfloat32_t svldff1_gather_offset(svbool_t, float32_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32offset_s32))) svint32_t svldff1_gather_offset(svbool_t, int32_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s64offset_u64))) svuint64_t svldff1_gather_offset(svbool_t, uint64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s64offset_f64))) svfloat64_t svldff1_gather_offset(svbool_t, float64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_s64offset_s64))) svint64_t svldff1_gather_offset(svbool_t, int64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64offset_u64))) svuint64_t svldff1_gather_offset(svbool_t, uint64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64offset_f64))) svfloat64_t svldff1_gather_offset(svbool_t, float64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u64offset_s64))) svint64_t svldff1_gather_offset(svbool_t, int64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_u8))) svuint8_t svldff1_vnum(svbool_t, uint8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_u32))) svuint32_t svldff1_vnum(svbool_t, uint32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_u64))) svuint64_t svldff1_vnum(svbool_t, uint64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_u16))) svuint16_t svldff1_vnum(svbool_t, uint16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_s8))) svint8_t svldff1_vnum(svbool_t, int8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_f64))) svfloat64_t svldff1_vnum(svbool_t, float64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_f32))) svfloat32_t svldff1_vnum(svbool_t, float32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_f16))) svfloat16_t svldff1_vnum(svbool_t, float16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_s32))) svint32_t svldff1_vnum(svbool_t, int32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_s64))) svint64_t svldff1_vnum(svbool_t, int64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_s16))) svint16_t svldff1_vnum(svbool_t, int16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u32base_offset_u32))) svuint32_t svldff1sb_gather_offset_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u64base_offset_u64))) svuint64_t svldff1sb_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u32base_offset_s32))) svint32_t svldff1sb_gather_offset_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u64base_offset_s64))) svint64_t svldff1sb_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u32base_u32))) svuint32_t svldff1sb_gather_u32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u64base_u64))) svuint64_t svldff1sb_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u32base_s32))) svint32_t svldff1sb_gather_s32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u64base_s64))) svint64_t svldff1sb_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_s32offset_u32))) svuint32_t svldff1sb_gather_offset_u32(svbool_t, int8_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_s32offset_s32))) svint32_t svldff1sb_gather_offset_s32(svbool_t, int8_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u32offset_u32))) svuint32_t svldff1sb_gather_offset_u32(svbool_t, int8_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u32offset_s32))) svint32_t svldff1sb_gather_offset_s32(svbool_t, int8_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_s64offset_u64))) svuint64_t svldff1sb_gather_offset_u64(svbool_t, int8_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_s64offset_s64))) svint64_t svldff1sb_gather_offset_s64(svbool_t, int8_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u64offset_u64))) svuint64_t svldff1sb_gather_offset_u64(svbool_t, int8_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u64offset_s64))) svint64_t svldff1sb_gather_offset_s64(svbool_t, int8_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32base_index_u32))) svuint32_t svldff1sh_gather_index_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64base_index_u64))) svuint64_t svldff1sh_gather_index_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32base_index_s32))) svint32_t svldff1sh_gather_index_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64base_index_s64))) svint64_t svldff1sh_gather_index_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32base_offset_u32))) svuint32_t svldff1sh_gather_offset_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64base_offset_u64))) svuint64_t svldff1sh_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32base_offset_s32))) svint32_t svldff1sh_gather_offset_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64base_offset_s64))) svint64_t svldff1sh_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32base_u32))) svuint32_t svldff1sh_gather_u32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64base_u64))) svuint64_t svldff1sh_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32base_s32))) svint32_t svldff1sh_gather_s32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64base_s64))) svint64_t svldff1sh_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_s32index_u32))) svuint32_t svldff1sh_gather_index_u32(svbool_t, int16_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_s32index_s32))) svint32_t svldff1sh_gather_index_s32(svbool_t, int16_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32index_u32))) svuint32_t svldff1sh_gather_index_u32(svbool_t, int16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32index_s32))) svint32_t svldff1sh_gather_index_s32(svbool_t, int16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_s64index_u64))) svuint64_t svldff1sh_gather_index_u64(svbool_t, int16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_s64index_s64))) svint64_t svldff1sh_gather_index_s64(svbool_t, int16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64index_u64))) svuint64_t svldff1sh_gather_index_u64(svbool_t, int16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64index_s64))) svint64_t svldff1sh_gather_index_s64(svbool_t, int16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_s32offset_u32))) svuint32_t svldff1sh_gather_offset_u32(svbool_t, int16_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_s32offset_s32))) svint32_t svldff1sh_gather_offset_s32(svbool_t, int16_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32offset_u32))) svuint32_t svldff1sh_gather_offset_u32(svbool_t, int16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u32offset_s32))) svint32_t svldff1sh_gather_offset_s32(svbool_t, int16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_s64offset_u64))) svuint64_t svldff1sh_gather_offset_u64(svbool_t, int16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_s64offset_s64))) svint64_t svldff1sh_gather_offset_s64(svbool_t, int16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64offset_u64))) svuint64_t svldff1sh_gather_offset_u64(svbool_t, int16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sh_gather_u64offset_s64))) svint64_t svldff1sh_gather_offset_s64(svbool_t, int16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64base_index_u64))) svuint64_t svldff1sw_gather_index_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64base_index_s64))) svint64_t svldff1sw_gather_index_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64base_offset_u64))) svuint64_t svldff1sw_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64base_offset_s64))) svint64_t svldff1sw_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64base_u64))) svuint64_t svldff1sw_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64base_s64))) svint64_t svldff1sw_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_s64index_u64))) svuint64_t svldff1sw_gather_index_u64(svbool_t, int32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_s64index_s64))) svint64_t svldff1sw_gather_index_s64(svbool_t, int32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64index_u64))) svuint64_t svldff1sw_gather_index_u64(svbool_t, int32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64index_s64))) svint64_t svldff1sw_gather_index_s64(svbool_t, int32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_s64offset_u64))) svuint64_t svldff1sw_gather_offset_u64(svbool_t, int32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_s64offset_s64))) svint64_t svldff1sw_gather_offset_s64(svbool_t, int32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64offset_u64))) svuint64_t svldff1sw_gather_offset_u64(svbool_t, int32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sw_gather_u64offset_s64))) svint64_t svldff1sw_gather_offset_s64(svbool_t, int32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u32base_offset_u32))) svuint32_t svldff1ub_gather_offset_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u64base_offset_u64))) svuint64_t svldff1ub_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u32base_offset_s32))) svint32_t svldff1ub_gather_offset_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u64base_offset_s64))) svint64_t svldff1ub_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u32base_u32))) svuint32_t svldff1ub_gather_u32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u64base_u64))) svuint64_t svldff1ub_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u32base_s32))) svint32_t svldff1ub_gather_s32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u64base_s64))) svint64_t svldff1ub_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_s32offset_u32))) svuint32_t svldff1ub_gather_offset_u32(svbool_t, uint8_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_s32offset_s32))) svint32_t svldff1ub_gather_offset_s32(svbool_t, uint8_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u32offset_u32))) svuint32_t svldff1ub_gather_offset_u32(svbool_t, uint8_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u32offset_s32))) svint32_t svldff1ub_gather_offset_s32(svbool_t, uint8_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_s64offset_u64))) svuint64_t svldff1ub_gather_offset_u64(svbool_t, uint8_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_s64offset_s64))) svint64_t svldff1ub_gather_offset_s64(svbool_t, uint8_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u64offset_u64))) svuint64_t svldff1ub_gather_offset_u64(svbool_t, uint8_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1ub_gather_u64offset_s64))) svint64_t svldff1ub_gather_offset_s64(svbool_t, uint8_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32base_index_u32))) svuint32_t svldff1uh_gather_index_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64base_index_u64))) svuint64_t svldff1uh_gather_index_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32base_index_s32))) svint32_t svldff1uh_gather_index_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64base_index_s64))) svint64_t svldff1uh_gather_index_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32base_offset_u32))) svuint32_t svldff1uh_gather_offset_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64base_offset_u64))) svuint64_t svldff1uh_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32base_offset_s32))) svint32_t svldff1uh_gather_offset_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64base_offset_s64))) svint64_t svldff1uh_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32base_u32))) svuint32_t svldff1uh_gather_u32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64base_u64))) svuint64_t svldff1uh_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32base_s32))) svint32_t svldff1uh_gather_s32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64base_s64))) svint64_t svldff1uh_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_s32index_u32))) svuint32_t svldff1uh_gather_index_u32(svbool_t, uint16_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_s32index_s32))) svint32_t svldff1uh_gather_index_s32(svbool_t, uint16_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32index_u32))) svuint32_t svldff1uh_gather_index_u32(svbool_t, uint16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32index_s32))) svint32_t svldff1uh_gather_index_s32(svbool_t, uint16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_s64index_u64))) svuint64_t svldff1uh_gather_index_u64(svbool_t, uint16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_s64index_s64))) svint64_t svldff1uh_gather_index_s64(svbool_t, uint16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64index_u64))) svuint64_t svldff1uh_gather_index_u64(svbool_t, uint16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64index_s64))) svint64_t svldff1uh_gather_index_s64(svbool_t, uint16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_s32offset_u32))) svuint32_t svldff1uh_gather_offset_u32(svbool_t, uint16_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_s32offset_s32))) svint32_t svldff1uh_gather_offset_s32(svbool_t, uint16_t const *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32offset_u32))) svuint32_t svldff1uh_gather_offset_u32(svbool_t, uint16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u32offset_s32))) svint32_t svldff1uh_gather_offset_s32(svbool_t, uint16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_s64offset_u64))) svuint64_t svldff1uh_gather_offset_u64(svbool_t, uint16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_s64offset_s64))) svint64_t svldff1uh_gather_offset_s64(svbool_t, uint16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64offset_u64))) svuint64_t svldff1uh_gather_offset_u64(svbool_t, uint16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uh_gather_u64offset_s64))) svint64_t svldff1uh_gather_offset_s64(svbool_t, uint16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64base_index_u64))) svuint64_t svldff1uw_gather_index_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64base_index_s64))) svint64_t svldff1uw_gather_index_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64base_offset_u64))) svuint64_t svldff1uw_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64base_offset_s64))) svint64_t svldff1uw_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64base_u64))) svuint64_t svldff1uw_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64base_s64))) svint64_t svldff1uw_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_s64index_u64))) svuint64_t svldff1uw_gather_index_u64(svbool_t, uint32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_s64index_s64))) svint64_t svldff1uw_gather_index_s64(svbool_t, uint32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64index_u64))) svuint64_t svldff1uw_gather_index_u64(svbool_t, uint32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64index_s64))) svint64_t svldff1uw_gather_index_s64(svbool_t, uint32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_s64offset_u64))) svuint64_t svldff1uw_gather_offset_u64(svbool_t, uint32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_s64offset_s64))) svint64_t svldff1uw_gather_offset_s64(svbool_t, uint32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64offset_u64))) svuint64_t svldff1uw_gather_offset_u64(svbool_t, uint32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1uw_gather_u64offset_s64))) svint64_t svldff1uw_gather_offset_s64(svbool_t, uint32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_u8))) svuint8_t svldnf1(svbool_t, uint8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_u32))) svuint32_t svldnf1(svbool_t, uint32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_u64))) svuint64_t svldnf1(svbool_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_u16))) svuint16_t svldnf1(svbool_t, uint16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_s8))) svint8_t svldnf1(svbool_t, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_f64))) svfloat64_t svldnf1(svbool_t, float64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_f32))) svfloat32_t svldnf1(svbool_t, float32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_f16))) svfloat16_t svldnf1(svbool_t, float16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_s32))) svint32_t svldnf1(svbool_t, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_s64))) svint64_t svldnf1(svbool_t, int64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_s16))) svint16_t svldnf1(svbool_t, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_u8))) svuint8_t svldnf1_vnum(svbool_t, uint8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_u32))) svuint32_t svldnf1_vnum(svbool_t, uint32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_u64))) svuint64_t svldnf1_vnum(svbool_t, uint64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_u16))) svuint16_t svldnf1_vnum(svbool_t, uint16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_s8))) svint8_t svldnf1_vnum(svbool_t, int8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_f64))) svfloat64_t svldnf1_vnum(svbool_t, float64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_f32))) svfloat32_t svldnf1_vnum(svbool_t, float32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_f16))) svfloat16_t svldnf1_vnum(svbool_t, float16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_s32))) svint32_t svldnf1_vnum(svbool_t, int32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_s64))) svint64_t svldnf1_vnum(svbool_t, int64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_s16))) svint16_t svldnf1_vnum(svbool_t, int16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8))) svuint8_t svldnt1(svbool_t, uint8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u32))) svuint32_t svldnt1(svbool_t, uint32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64))) svuint64_t svldnt1(svbool_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u16))) svuint16_t svldnt1(svbool_t, uint16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8))) svint8_t svldnt1(svbool_t, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64))) svfloat64_t svldnt1(svbool_t, float64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f32))) svfloat32_t svldnt1(svbool_t, float32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f16))) svfloat16_t svldnt1(svbool_t, float16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s32))) svint32_t svldnt1(svbool_t, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s64))) svint64_t svldnt1(svbool_t, int64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s16))) svint16_t svldnt1(svbool_t, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8))) svuint8_t svldnt1_vnum(svbool_t, uint8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u32))) svuint32_t svldnt1_vnum(svbool_t, uint32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64))) svuint64_t svldnt1_vnum(svbool_t, uint64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u16))) svuint16_t svldnt1_vnum(svbool_t, uint16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8))) svint8_t svldnt1_vnum(svbool_t, int8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64))) svfloat64_t svldnt1_vnum(svbool_t, float64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f32))) svfloat32_t svldnt1_vnum(svbool_t, float32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f16))) svfloat16_t svldnt1_vnum(svbool_t, float16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s32))) svint32_t svldnt1_vnum(svbool_t, int32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s64))) svint64_t svldnt1_vnum(svbool_t, int64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s16))) svint16_t svldnt1_vnum(svbool_t, int16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_u8))) uint64_t svlen(svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_u32))) uint64_t svlen(svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_u64))) uint64_t svlen(svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_u16))) uint64_t svlen(svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_s8))) uint64_t svlen(svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_f64))) uint64_t svlen(svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_f32))) uint64_t svlen(svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_f16))) uint64_t svlen(svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_s32))) uint64_t svlen(svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_s64))) uint64_t svlen(svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_s16))) uint64_t svlen(svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u8_m))) svuint8_t svlsl_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u32_m))) svuint32_t svlsl_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u64_m))) svuint64_t svlsl_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u16_m))) svuint16_t svlsl_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s8_m))) svint8_t svlsl_m(svbool_t, svint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s32_m))) svint32_t svlsl_m(svbool_t, svint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s64_m))) svint64_t svlsl_m(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s16_m))) svint16_t svlsl_m(svbool_t, svint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u8_x))) svuint8_t svlsl_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u32_x))) svuint32_t svlsl_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u64_x))) svuint64_t svlsl_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u16_x))) svuint16_t svlsl_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s8_x))) svint8_t svlsl_x(svbool_t, svint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s32_x))) svint32_t svlsl_x(svbool_t, svint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s64_x))) svint64_t svlsl_x(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s16_x))) svint16_t svlsl_x(svbool_t, svint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u8_z))) svuint8_t svlsl_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u32_z))) svuint32_t svlsl_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u64_z))) svuint64_t svlsl_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_u16_z))) svuint16_t svlsl_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s8_z))) svint8_t svlsl_z(svbool_t, svint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s32_z))) svint32_t svlsl_z(svbool_t, svint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s64_z))) svint64_t svlsl_z(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_n_s16_z))) svint16_t svlsl_z(svbool_t, svint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u8_m))) svuint8_t svlsl_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u32_m))) svuint32_t svlsl_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u64_m))) svuint64_t svlsl_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u16_m))) svuint16_t svlsl_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s8_m))) svint8_t svlsl_m(svbool_t, svint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s32_m))) svint32_t svlsl_m(svbool_t, svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s64_m))) svint64_t svlsl_m(svbool_t, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s16_m))) svint16_t svlsl_m(svbool_t, svint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u8_x))) svuint8_t svlsl_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u32_x))) svuint32_t svlsl_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u64_x))) svuint64_t svlsl_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u16_x))) svuint16_t svlsl_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s8_x))) svint8_t svlsl_x(svbool_t, svint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s32_x))) svint32_t svlsl_x(svbool_t, svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s64_x))) svint64_t svlsl_x(svbool_t, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s16_x))) svint16_t svlsl_x(svbool_t, svint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u8_z))) svuint8_t svlsl_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u32_z))) svuint32_t svlsl_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u64_z))) svuint64_t svlsl_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_u16_z))) svuint16_t svlsl_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s8_z))) svint8_t svlsl_z(svbool_t, svint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s32_z))) svint32_t svlsl_z(svbool_t, svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s64_z))) svint64_t svlsl_z(svbool_t, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_s16_z))) svint16_t svlsl_z(svbool_t, svint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u8_m))) svuint8_t svlsl_wide_m(svbool_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u32_m))) svuint32_t svlsl_wide_m(svbool_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u16_m))) svuint16_t svlsl_wide_m(svbool_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s8_m))) svint8_t svlsl_wide_m(svbool_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s32_m))) svint32_t svlsl_wide_m(svbool_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s16_m))) svint16_t svlsl_wide_m(svbool_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u8_x))) svuint8_t svlsl_wide_x(svbool_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u32_x))) svuint32_t svlsl_wide_x(svbool_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u16_x))) svuint16_t svlsl_wide_x(svbool_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s8_x))) svint8_t svlsl_wide_x(svbool_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s32_x))) svint32_t svlsl_wide_x(svbool_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s16_x))) svint16_t svlsl_wide_x(svbool_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u8_z))) svuint8_t svlsl_wide_z(svbool_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u32_z))) svuint32_t svlsl_wide_z(svbool_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_u16_z))) svuint16_t svlsl_wide_z(svbool_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s8_z))) svint8_t svlsl_wide_z(svbool_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s32_z))) svint32_t svlsl_wide_z(svbool_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_n_s16_z))) svint16_t svlsl_wide_z(svbool_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u8_m))) svuint8_t svlsl_wide_m(svbool_t, svuint8_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u32_m))) svuint32_t svlsl_wide_m(svbool_t, svuint32_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u16_m))) svuint16_t svlsl_wide_m(svbool_t, svuint16_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s8_m))) svint8_t svlsl_wide_m(svbool_t, svint8_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s32_m))) svint32_t svlsl_wide_m(svbool_t, svint32_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s16_m))) svint16_t svlsl_wide_m(svbool_t, svint16_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u8_x))) svuint8_t svlsl_wide_x(svbool_t, svuint8_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u32_x))) svuint32_t svlsl_wide_x(svbool_t, svuint32_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u16_x))) svuint16_t svlsl_wide_x(svbool_t, svuint16_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s8_x))) svint8_t svlsl_wide_x(svbool_t, svint8_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s32_x))) svint32_t svlsl_wide_x(svbool_t, svint32_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s16_x))) svint16_t svlsl_wide_x(svbool_t, svint16_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u8_z))) svuint8_t svlsl_wide_z(svbool_t, svuint8_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u32_z))) svuint32_t svlsl_wide_z(svbool_t, svuint32_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_u16_z))) svuint16_t svlsl_wide_z(svbool_t, svuint16_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s8_z))) svint8_t svlsl_wide_z(svbool_t, svint8_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s32_z))) svint32_t svlsl_wide_z(svbool_t, svint32_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsl_wide_s16_z))) svint16_t svlsl_wide_z(svbool_t, svint16_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u8_m))) svuint8_t svlsr_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u32_m))) svuint32_t svlsr_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u64_m))) svuint64_t svlsr_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u16_m))) svuint16_t svlsr_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u8_x))) svuint8_t svlsr_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u32_x))) svuint32_t svlsr_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u64_x))) svuint64_t svlsr_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u16_x))) svuint16_t svlsr_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u8_z))) svuint8_t svlsr_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u32_z))) svuint32_t svlsr_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u64_z))) svuint64_t svlsr_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_n_u16_z))) svuint16_t svlsr_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u8_m))) svuint8_t svlsr_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u32_m))) svuint32_t svlsr_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u64_m))) svuint64_t svlsr_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u16_m))) svuint16_t svlsr_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u8_x))) svuint8_t svlsr_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u32_x))) svuint32_t svlsr_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u64_x))) svuint64_t svlsr_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u16_x))) svuint16_t svlsr_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u8_z))) svuint8_t svlsr_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u32_z))) svuint32_t svlsr_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u64_z))) svuint64_t svlsr_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_u16_z))) svuint16_t svlsr_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u8_m))) svuint8_t svlsr_wide_m(svbool_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u32_m))) svuint32_t svlsr_wide_m(svbool_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u16_m))) svuint16_t svlsr_wide_m(svbool_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u8_x))) svuint8_t svlsr_wide_x(svbool_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u32_x))) svuint32_t svlsr_wide_x(svbool_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u16_x))) svuint16_t svlsr_wide_x(svbool_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u8_z))) svuint8_t svlsr_wide_z(svbool_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u32_z))) svuint32_t svlsr_wide_z(svbool_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_n_u16_z))) svuint16_t svlsr_wide_z(svbool_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u8_m))) svuint8_t svlsr_wide_m(svbool_t, svuint8_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u32_m))) svuint32_t svlsr_wide_m(svbool_t, svuint32_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u16_m))) svuint16_t svlsr_wide_m(svbool_t, svuint16_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u8_x))) svuint8_t svlsr_wide_x(svbool_t, svuint8_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u32_x))) svuint32_t svlsr_wide_x(svbool_t, svuint32_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u16_x))) svuint16_t svlsr_wide_x(svbool_t, svuint16_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u8_z))) svuint8_t svlsr_wide_z(svbool_t, svuint8_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u32_z))) svuint32_t svlsr_wide_z(svbool_t, svuint32_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlsr_wide_u16_z))) svuint16_t svlsr_wide_z(svbool_t, svuint16_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f64_m))) svfloat64_t svmad_m(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f32_m))) svfloat32_t svmad_m(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f16_m))) svfloat16_t svmad_m(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f64_x))) svfloat64_t svmad_x(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f32_x))) svfloat32_t svmad_x(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f16_x))) svfloat16_t svmad_x(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f64_z))) svfloat64_t svmad_z(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f32_z))) svfloat32_t svmad_z(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_f16_z))) svfloat16_t svmad_z(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u8_m))) svuint8_t svmad_m(svbool_t, svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u32_m))) svuint32_t svmad_m(svbool_t, svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u64_m))) svuint64_t svmad_m(svbool_t, svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u16_m))) svuint16_t svmad_m(svbool_t, svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s8_m))) svint8_t svmad_m(svbool_t, svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s32_m))) svint32_t svmad_m(svbool_t, svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s64_m))) svint64_t svmad_m(svbool_t, svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s16_m))) svint16_t svmad_m(svbool_t, svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u8_x))) svuint8_t svmad_x(svbool_t, svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u32_x))) svuint32_t svmad_x(svbool_t, svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u64_x))) svuint64_t svmad_x(svbool_t, svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u16_x))) svuint16_t svmad_x(svbool_t, svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s8_x))) svint8_t svmad_x(svbool_t, svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s32_x))) svint32_t svmad_x(svbool_t, svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s64_x))) svint64_t svmad_x(svbool_t, svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s16_x))) svint16_t svmad_x(svbool_t, svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u8_z))) svuint8_t svmad_z(svbool_t, svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u32_z))) svuint32_t svmad_z(svbool_t, svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u64_z))) svuint64_t svmad_z(svbool_t, svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_u16_z))) svuint16_t svmad_z(svbool_t, svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s8_z))) svint8_t svmad_z(svbool_t, svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s32_z))) svint32_t svmad_z(svbool_t, svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s64_z))) svint64_t svmad_z(svbool_t, svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_n_s16_z))) svint16_t svmad_z(svbool_t, svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f64_m))) svfloat64_t svmad_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f32_m))) svfloat32_t svmad_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f16_m))) svfloat16_t svmad_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f64_x))) svfloat64_t svmad_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f32_x))) svfloat32_t svmad_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f16_x))) svfloat16_t svmad_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f64_z))) svfloat64_t svmad_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f32_z))) svfloat32_t svmad_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_f16_z))) svfloat16_t svmad_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u8_m))) svuint8_t svmad_m(svbool_t, svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u32_m))) svuint32_t svmad_m(svbool_t, svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u64_m))) svuint64_t svmad_m(svbool_t, svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u16_m))) svuint16_t svmad_m(svbool_t, svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s8_m))) svint8_t svmad_m(svbool_t, svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s32_m))) svint32_t svmad_m(svbool_t, svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s64_m))) svint64_t svmad_m(svbool_t, svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s16_m))) svint16_t svmad_m(svbool_t, svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u8_x))) svuint8_t svmad_x(svbool_t, svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u32_x))) svuint32_t svmad_x(svbool_t, svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u64_x))) svuint64_t svmad_x(svbool_t, svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u16_x))) svuint16_t svmad_x(svbool_t, svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s8_x))) svint8_t svmad_x(svbool_t, svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s32_x))) svint32_t svmad_x(svbool_t, svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s64_x))) svint64_t svmad_x(svbool_t, svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s16_x))) svint16_t svmad_x(svbool_t, svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u8_z))) svuint8_t svmad_z(svbool_t, svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u32_z))) svuint32_t svmad_z(svbool_t, svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u64_z))) svuint64_t svmad_z(svbool_t, svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_u16_z))) svuint16_t svmad_z(svbool_t, svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s8_z))) svint8_t svmad_z(svbool_t, svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s32_z))) svint32_t svmad_z(svbool_t, svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s64_z))) svint64_t svmad_z(svbool_t, svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmad_s16_z))) svint16_t svmad_z(svbool_t, svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f64_m))) svfloat64_t svmax_m(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f32_m))) svfloat32_t svmax_m(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f16_m))) svfloat16_t svmax_m(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f64_x))) svfloat64_t svmax_x(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f32_x))) svfloat32_t svmax_x(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f16_x))) svfloat16_t svmax_x(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f64_z))) svfloat64_t svmax_z(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f32_z))) svfloat32_t svmax_z(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_f16_z))) svfloat16_t svmax_z(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s8_m))) svint8_t svmax_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s32_m))) svint32_t svmax_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s64_m))) svint64_t svmax_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s16_m))) svint16_t svmax_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s8_x))) svint8_t svmax_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s32_x))) svint32_t svmax_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s64_x))) svint64_t svmax_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s16_x))) svint16_t svmax_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s8_z))) svint8_t svmax_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s32_z))) svint32_t svmax_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s64_z))) svint64_t svmax_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_s16_z))) svint16_t svmax_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u8_m))) svuint8_t svmax_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u32_m))) svuint32_t svmax_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u64_m))) svuint64_t svmax_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u16_m))) svuint16_t svmax_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u8_x))) svuint8_t svmax_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u32_x))) svuint32_t svmax_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u64_x))) svuint64_t svmax_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u16_x))) svuint16_t svmax_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u8_z))) svuint8_t svmax_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u32_z))) svuint32_t svmax_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u64_z))) svuint64_t svmax_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_u16_z))) svuint16_t svmax_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f64_m))) svfloat64_t svmax_m(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f32_m))) svfloat32_t svmax_m(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f16_m))) svfloat16_t svmax_m(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f64_x))) svfloat64_t svmax_x(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f32_x))) svfloat32_t svmax_x(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f16_x))) svfloat16_t svmax_x(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f64_z))) svfloat64_t svmax_z(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f32_z))) svfloat32_t svmax_z(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_f16_z))) svfloat16_t svmax_z(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s8_m))) svint8_t svmax_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s32_m))) svint32_t svmax_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s64_m))) svint64_t svmax_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s16_m))) svint16_t svmax_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s8_x))) svint8_t svmax_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s32_x))) svint32_t svmax_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s64_x))) svint64_t svmax_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s16_x))) svint16_t svmax_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s8_z))) svint8_t svmax_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s32_z))) svint32_t svmax_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s64_z))) svint64_t svmax_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_s16_z))) svint16_t svmax_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u8_m))) svuint8_t svmax_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u32_m))) svuint32_t svmax_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u64_m))) svuint64_t svmax_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u16_m))) svuint16_t svmax_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u8_x))) svuint8_t svmax_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u32_x))) svuint32_t svmax_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u64_x))) svuint64_t svmax_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u16_x))) svuint16_t svmax_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u8_z))) svuint8_t svmax_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u32_z))) svuint32_t svmax_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u64_z))) svuint64_t svmax_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_u16_z))) svuint16_t svmax_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f64_m))) svfloat64_t svmaxnm_m(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f32_m))) svfloat32_t svmaxnm_m(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f16_m))) svfloat16_t svmaxnm_m(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f64_x))) svfloat64_t svmaxnm_x(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f32_x))) svfloat32_t svmaxnm_x(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f16_x))) svfloat16_t svmaxnm_x(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f64_z))) svfloat64_t svmaxnm_z(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f32_z))) svfloat32_t svmaxnm_z(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_f16_z))) svfloat16_t svmaxnm_z(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f64_m))) svfloat64_t svmaxnm_m(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f32_m))) svfloat32_t svmaxnm_m(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f16_m))) svfloat16_t svmaxnm_m(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f64_x))) svfloat64_t svmaxnm_x(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f32_x))) svfloat32_t svmaxnm_x(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f16_x))) svfloat16_t svmaxnm_x(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f64_z))) svfloat64_t svmaxnm_z(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f32_z))) svfloat32_t svmaxnm_z(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_f16_z))) svfloat16_t svmaxnm_z(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmv_f64))) float64_t svmaxnmv(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmv_f32))) float32_t svmaxnmv(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmv_f16))) float16_t svmaxnmv(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_f64))) float64_t svmaxv(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_f32))) float32_t svmaxv(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_f16))) float16_t svmaxv(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_s8))) int8_t svmaxv(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_s32))) int32_t svmaxv(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_s64))) int64_t svmaxv(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_s16))) int16_t svmaxv(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_u8))) uint8_t svmaxv(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_u32))) uint32_t svmaxv(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_u64))) uint64_t svmaxv(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxv_u16))) uint16_t svmaxv(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f64_m))) svfloat64_t svmin_m(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f32_m))) svfloat32_t svmin_m(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f16_m))) svfloat16_t svmin_m(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f64_x))) svfloat64_t svmin_x(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f32_x))) svfloat32_t svmin_x(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f16_x))) svfloat16_t svmin_x(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f64_z))) svfloat64_t svmin_z(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f32_z))) svfloat32_t svmin_z(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_f16_z))) svfloat16_t svmin_z(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s8_m))) svint8_t svmin_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s32_m))) svint32_t svmin_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s64_m))) svint64_t svmin_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s16_m))) svint16_t svmin_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s8_x))) svint8_t svmin_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s32_x))) svint32_t svmin_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s64_x))) svint64_t svmin_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s16_x))) svint16_t svmin_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s8_z))) svint8_t svmin_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s32_z))) svint32_t svmin_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s64_z))) svint64_t svmin_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_s16_z))) svint16_t svmin_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u8_m))) svuint8_t svmin_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u32_m))) svuint32_t svmin_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u64_m))) svuint64_t svmin_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u16_m))) svuint16_t svmin_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u8_x))) svuint8_t svmin_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u32_x))) svuint32_t svmin_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u64_x))) svuint64_t svmin_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u16_x))) svuint16_t svmin_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u8_z))) svuint8_t svmin_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u32_z))) svuint32_t svmin_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u64_z))) svuint64_t svmin_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_u16_z))) svuint16_t svmin_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f64_m))) svfloat64_t svmin_m(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f32_m))) svfloat32_t svmin_m(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f16_m))) svfloat16_t svmin_m(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f64_x))) svfloat64_t svmin_x(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f32_x))) svfloat32_t svmin_x(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f16_x))) svfloat16_t svmin_x(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f64_z))) svfloat64_t svmin_z(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f32_z))) svfloat32_t svmin_z(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_f16_z))) svfloat16_t svmin_z(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s8_m))) svint8_t svmin_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s32_m))) svint32_t svmin_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s64_m))) svint64_t svmin_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s16_m))) svint16_t svmin_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s8_x))) svint8_t svmin_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s32_x))) svint32_t svmin_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s64_x))) svint64_t svmin_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s16_x))) svint16_t svmin_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s8_z))) svint8_t svmin_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s32_z))) svint32_t svmin_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s64_z))) svint64_t svmin_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_s16_z))) svint16_t svmin_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u8_m))) svuint8_t svmin_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u32_m))) svuint32_t svmin_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u64_m))) svuint64_t svmin_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u16_m))) svuint16_t svmin_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u8_x))) svuint8_t svmin_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u32_x))) svuint32_t svmin_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u64_x))) svuint64_t svmin_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u16_x))) svuint16_t svmin_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u8_z))) svuint8_t svmin_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u32_z))) svuint32_t svmin_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u64_z))) svuint64_t svmin_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_u16_z))) svuint16_t svmin_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f64_m))) svfloat64_t svminnm_m(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f32_m))) svfloat32_t svminnm_m(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f16_m))) svfloat16_t svminnm_m(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f64_x))) svfloat64_t svminnm_x(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f32_x))) svfloat32_t svminnm_x(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f16_x))) svfloat16_t svminnm_x(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f64_z))) svfloat64_t svminnm_z(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f32_z))) svfloat32_t svminnm_z(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_f16_z))) svfloat16_t svminnm_z(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f64_m))) svfloat64_t svminnm_m(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f32_m))) svfloat32_t svminnm_m(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f16_m))) svfloat16_t svminnm_m(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f64_x))) svfloat64_t svminnm_x(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f32_x))) svfloat32_t svminnm_x(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f16_x))) svfloat16_t svminnm_x(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f64_z))) svfloat64_t svminnm_z(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f32_z))) svfloat32_t svminnm_z(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_f16_z))) svfloat16_t svminnm_z(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmv_f64))) float64_t svminnmv(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmv_f32))) float32_t svminnmv(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmv_f16))) float16_t svminnmv(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_f64))) float64_t svminv(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_f32))) float32_t svminv(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_f16))) float16_t svminv(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_s8))) int8_t svminv(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_s32))) int32_t svminv(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_s64))) int64_t svminv(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_s16))) int16_t svminv(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_u8))) uint8_t svminv(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_u32))) uint32_t svminv(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_u64))) uint64_t svminv(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminv_u16))) uint16_t svminv(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f64_m))) svfloat64_t svmla_m(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f32_m))) svfloat32_t svmla_m(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f16_m))) svfloat16_t svmla_m(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f64_x))) svfloat64_t svmla_x(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f32_x))) svfloat32_t svmla_x(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f16_x))) svfloat16_t svmla_x(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f64_z))) svfloat64_t svmla_z(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f32_z))) svfloat32_t svmla_z(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_f16_z))) svfloat16_t svmla_z(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u8_m))) svuint8_t svmla_m(svbool_t, svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u32_m))) svuint32_t svmla_m(svbool_t, svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u64_m))) svuint64_t svmla_m(svbool_t, svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u16_m))) svuint16_t svmla_m(svbool_t, svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s8_m))) svint8_t svmla_m(svbool_t, svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s32_m))) svint32_t svmla_m(svbool_t, svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s64_m))) svint64_t svmla_m(svbool_t, svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s16_m))) svint16_t svmla_m(svbool_t, svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u8_x))) svuint8_t svmla_x(svbool_t, svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u32_x))) svuint32_t svmla_x(svbool_t, svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u64_x))) svuint64_t svmla_x(svbool_t, svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u16_x))) svuint16_t svmla_x(svbool_t, svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s8_x))) svint8_t svmla_x(svbool_t, svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s32_x))) svint32_t svmla_x(svbool_t, svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s64_x))) svint64_t svmla_x(svbool_t, svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s16_x))) svint16_t svmla_x(svbool_t, svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u8_z))) svuint8_t svmla_z(svbool_t, svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u32_z))) svuint32_t svmla_z(svbool_t, svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u64_z))) svuint64_t svmla_z(svbool_t, svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_u16_z))) svuint16_t svmla_z(svbool_t, svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s8_z))) svint8_t svmla_z(svbool_t, svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s32_z))) svint32_t svmla_z(svbool_t, svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s64_z))) svint64_t svmla_z(svbool_t, svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_s16_z))) svint16_t svmla_z(svbool_t, svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f64_m))) svfloat64_t svmla_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f32_m))) svfloat32_t svmla_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f16_m))) svfloat16_t svmla_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f64_x))) svfloat64_t svmla_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f32_x))) svfloat32_t svmla_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f16_x))) svfloat16_t svmla_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f64_z))) svfloat64_t svmla_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f32_z))) svfloat32_t svmla_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_f16_z))) svfloat16_t svmla_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u8_m))) svuint8_t svmla_m(svbool_t, svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u32_m))) svuint32_t svmla_m(svbool_t, svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u64_m))) svuint64_t svmla_m(svbool_t, svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u16_m))) svuint16_t svmla_m(svbool_t, svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s8_m))) svint8_t svmla_m(svbool_t, svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s32_m))) svint32_t svmla_m(svbool_t, svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s64_m))) svint64_t svmla_m(svbool_t, svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s16_m))) svint16_t svmla_m(svbool_t, svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u8_x))) svuint8_t svmla_x(svbool_t, svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u32_x))) svuint32_t svmla_x(svbool_t, svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u64_x))) svuint64_t svmla_x(svbool_t, svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u16_x))) svuint16_t svmla_x(svbool_t, svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s8_x))) svint8_t svmla_x(svbool_t, svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s32_x))) svint32_t svmla_x(svbool_t, svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s64_x))) svint64_t svmla_x(svbool_t, svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s16_x))) svint16_t svmla_x(svbool_t, svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u8_z))) svuint8_t svmla_z(svbool_t, svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u32_z))) svuint32_t svmla_z(svbool_t, svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u64_z))) svuint64_t svmla_z(svbool_t, svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_u16_z))) svuint16_t svmla_z(svbool_t, svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s8_z))) svint8_t svmla_z(svbool_t, svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s32_z))) svint32_t svmla_z(svbool_t, svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s64_z))) svint64_t svmla_z(svbool_t, svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_s16_z))) svint16_t svmla_z(svbool_t, svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_f64))) svfloat64_t svmla_lane(svfloat64_t, svfloat64_t, svfloat64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_f32))) svfloat32_t svmla_lane(svfloat32_t, svfloat32_t, svfloat32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_f16))) svfloat16_t svmla_lane(svfloat16_t, svfloat16_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f64_m))) svfloat64_t svmls_m(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f32_m))) svfloat32_t svmls_m(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f16_m))) svfloat16_t svmls_m(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f64_x))) svfloat64_t svmls_x(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f32_x))) svfloat32_t svmls_x(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f16_x))) svfloat16_t svmls_x(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f64_z))) svfloat64_t svmls_z(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f32_z))) svfloat32_t svmls_z(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_f16_z))) svfloat16_t svmls_z(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u8_m))) svuint8_t svmls_m(svbool_t, svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u32_m))) svuint32_t svmls_m(svbool_t, svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u64_m))) svuint64_t svmls_m(svbool_t, svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u16_m))) svuint16_t svmls_m(svbool_t, svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s8_m))) svint8_t svmls_m(svbool_t, svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s32_m))) svint32_t svmls_m(svbool_t, svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s64_m))) svint64_t svmls_m(svbool_t, svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s16_m))) svint16_t svmls_m(svbool_t, svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u8_x))) svuint8_t svmls_x(svbool_t, svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u32_x))) svuint32_t svmls_x(svbool_t, svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u64_x))) svuint64_t svmls_x(svbool_t, svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u16_x))) svuint16_t svmls_x(svbool_t, svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s8_x))) svint8_t svmls_x(svbool_t, svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s32_x))) svint32_t svmls_x(svbool_t, svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s64_x))) svint64_t svmls_x(svbool_t, svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s16_x))) svint16_t svmls_x(svbool_t, svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u8_z))) svuint8_t svmls_z(svbool_t, svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u32_z))) svuint32_t svmls_z(svbool_t, svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u64_z))) svuint64_t svmls_z(svbool_t, svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_u16_z))) svuint16_t svmls_z(svbool_t, svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s8_z))) svint8_t svmls_z(svbool_t, svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s32_z))) svint32_t svmls_z(svbool_t, svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s64_z))) svint64_t svmls_z(svbool_t, svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_s16_z))) svint16_t svmls_z(svbool_t, svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f64_m))) svfloat64_t svmls_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f32_m))) svfloat32_t svmls_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f16_m))) svfloat16_t svmls_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f64_x))) svfloat64_t svmls_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f32_x))) svfloat32_t svmls_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f16_x))) svfloat16_t svmls_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f64_z))) svfloat64_t svmls_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f32_z))) svfloat32_t svmls_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_f16_z))) svfloat16_t svmls_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u8_m))) svuint8_t svmls_m(svbool_t, svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u32_m))) svuint32_t svmls_m(svbool_t, svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u64_m))) svuint64_t svmls_m(svbool_t, svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u16_m))) svuint16_t svmls_m(svbool_t, svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s8_m))) svint8_t svmls_m(svbool_t, svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s32_m))) svint32_t svmls_m(svbool_t, svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s64_m))) svint64_t svmls_m(svbool_t, svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s16_m))) svint16_t svmls_m(svbool_t, svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u8_x))) svuint8_t svmls_x(svbool_t, svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u32_x))) svuint32_t svmls_x(svbool_t, svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u64_x))) svuint64_t svmls_x(svbool_t, svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u16_x))) svuint16_t svmls_x(svbool_t, svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s8_x))) svint8_t svmls_x(svbool_t, svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s32_x))) svint32_t svmls_x(svbool_t, svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s64_x))) svint64_t svmls_x(svbool_t, svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s16_x))) svint16_t svmls_x(svbool_t, svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u8_z))) svuint8_t svmls_z(svbool_t, svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u32_z))) svuint32_t svmls_z(svbool_t, svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u64_z))) svuint64_t svmls_z(svbool_t, svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_u16_z))) svuint16_t svmls_z(svbool_t, svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s8_z))) svint8_t svmls_z(svbool_t, svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s32_z))) svint32_t svmls_z(svbool_t, svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s64_z))) svint64_t svmls_z(svbool_t, svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_s16_z))) svint16_t svmls_z(svbool_t, svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_f64))) svfloat64_t svmls_lane(svfloat64_t, svfloat64_t, svfloat64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_f32))) svfloat32_t svmls_lane(svfloat32_t, svfloat32_t, svfloat32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_f16))) svfloat16_t svmls_lane(svfloat16_t, svfloat16_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmov_b_z))) svbool_t svmov_z(svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f64_m))) svfloat64_t svmsb_m(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f32_m))) svfloat32_t svmsb_m(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f16_m))) svfloat16_t svmsb_m(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f64_x))) svfloat64_t svmsb_x(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f32_x))) svfloat32_t svmsb_x(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f16_x))) svfloat16_t svmsb_x(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f64_z))) svfloat64_t svmsb_z(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f32_z))) svfloat32_t svmsb_z(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_f16_z))) svfloat16_t svmsb_z(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u8_m))) svuint8_t svmsb_m(svbool_t, svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u32_m))) svuint32_t svmsb_m(svbool_t, svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u64_m))) svuint64_t svmsb_m(svbool_t, svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u16_m))) svuint16_t svmsb_m(svbool_t, svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s8_m))) svint8_t svmsb_m(svbool_t, svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s32_m))) svint32_t svmsb_m(svbool_t, svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s64_m))) svint64_t svmsb_m(svbool_t, svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s16_m))) svint16_t svmsb_m(svbool_t, svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u8_x))) svuint8_t svmsb_x(svbool_t, svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u32_x))) svuint32_t svmsb_x(svbool_t, svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u64_x))) svuint64_t svmsb_x(svbool_t, svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u16_x))) svuint16_t svmsb_x(svbool_t, svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s8_x))) svint8_t svmsb_x(svbool_t, svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s32_x))) svint32_t svmsb_x(svbool_t, svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s64_x))) svint64_t svmsb_x(svbool_t, svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s16_x))) svint16_t svmsb_x(svbool_t, svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u8_z))) svuint8_t svmsb_z(svbool_t, svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u32_z))) svuint32_t svmsb_z(svbool_t, svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u64_z))) svuint64_t svmsb_z(svbool_t, svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_u16_z))) svuint16_t svmsb_z(svbool_t, svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s8_z))) svint8_t svmsb_z(svbool_t, svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s32_z))) svint32_t svmsb_z(svbool_t, svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s64_z))) svint64_t svmsb_z(svbool_t, svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_n_s16_z))) svint16_t svmsb_z(svbool_t, svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f64_m))) svfloat64_t svmsb_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f32_m))) svfloat32_t svmsb_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f16_m))) svfloat16_t svmsb_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f64_x))) svfloat64_t svmsb_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f32_x))) svfloat32_t svmsb_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f16_x))) svfloat16_t svmsb_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f64_z))) svfloat64_t svmsb_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f32_z))) svfloat32_t svmsb_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_f16_z))) svfloat16_t svmsb_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u8_m))) svuint8_t svmsb_m(svbool_t, svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u32_m))) svuint32_t svmsb_m(svbool_t, svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u64_m))) svuint64_t svmsb_m(svbool_t, svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u16_m))) svuint16_t svmsb_m(svbool_t, svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s8_m))) svint8_t svmsb_m(svbool_t, svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s32_m))) svint32_t svmsb_m(svbool_t, svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s64_m))) svint64_t svmsb_m(svbool_t, svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s16_m))) svint16_t svmsb_m(svbool_t, svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u8_x))) svuint8_t svmsb_x(svbool_t, svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u32_x))) svuint32_t svmsb_x(svbool_t, svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u64_x))) svuint64_t svmsb_x(svbool_t, svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u16_x))) svuint16_t svmsb_x(svbool_t, svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s8_x))) svint8_t svmsb_x(svbool_t, svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s32_x))) svint32_t svmsb_x(svbool_t, svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s64_x))) svint64_t svmsb_x(svbool_t, svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s16_x))) svint16_t svmsb_x(svbool_t, svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u8_z))) svuint8_t svmsb_z(svbool_t, svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u32_z))) svuint32_t svmsb_z(svbool_t, svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u64_z))) svuint64_t svmsb_z(svbool_t, svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_u16_z))) svuint16_t svmsb_z(svbool_t, svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s8_z))) svint8_t svmsb_z(svbool_t, svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s32_z))) svint32_t svmsb_z(svbool_t, svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s64_z))) svint64_t svmsb_z(svbool_t, svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmsb_s16_z))) svint16_t svmsb_z(svbool_t, svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f64_m))) svfloat64_t svmul_m(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f32_m))) svfloat32_t svmul_m(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f16_m))) svfloat16_t svmul_m(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f64_x))) svfloat64_t svmul_x(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f32_x))) svfloat32_t svmul_x(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f16_x))) svfloat16_t svmul_x(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f64_z))) svfloat64_t svmul_z(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f32_z))) svfloat32_t svmul_z(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_f16_z))) svfloat16_t svmul_z(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u8_m))) svuint8_t svmul_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u32_m))) svuint32_t svmul_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u64_m))) svuint64_t svmul_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u16_m))) svuint16_t svmul_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s8_m))) svint8_t svmul_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s32_m))) svint32_t svmul_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s64_m))) svint64_t svmul_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s16_m))) svint16_t svmul_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u8_x))) svuint8_t svmul_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u32_x))) svuint32_t svmul_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u64_x))) svuint64_t svmul_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u16_x))) svuint16_t svmul_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s8_x))) svint8_t svmul_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s32_x))) svint32_t svmul_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s64_x))) svint64_t svmul_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s16_x))) svint16_t svmul_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u8_z))) svuint8_t svmul_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u32_z))) svuint32_t svmul_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u64_z))) svuint64_t svmul_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_u16_z))) svuint16_t svmul_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s8_z))) svint8_t svmul_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s32_z))) svint32_t svmul_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s64_z))) svint64_t svmul_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_s16_z))) svint16_t svmul_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f64_m))) svfloat64_t svmul_m(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f32_m))) svfloat32_t svmul_m(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f16_m))) svfloat16_t svmul_m(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f64_x))) svfloat64_t svmul_x(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f32_x))) svfloat32_t svmul_x(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f16_x))) svfloat16_t svmul_x(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f64_z))) svfloat64_t svmul_z(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f32_z))) svfloat32_t svmul_z(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f16_z))) svfloat16_t svmul_z(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u8_m))) svuint8_t svmul_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u32_m))) svuint32_t svmul_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u64_m))) svuint64_t svmul_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u16_m))) svuint16_t svmul_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s8_m))) svint8_t svmul_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s32_m))) svint32_t svmul_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s64_m))) svint64_t svmul_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s16_m))) svint16_t svmul_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u8_x))) svuint8_t svmul_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u32_x))) svuint32_t svmul_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u64_x))) svuint64_t svmul_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u16_x))) svuint16_t svmul_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s8_x))) svint8_t svmul_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s32_x))) svint32_t svmul_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s64_x))) svint64_t svmul_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s16_x))) svint16_t svmul_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u8_z))) svuint8_t svmul_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u32_z))) svuint32_t svmul_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u64_z))) svuint64_t svmul_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_u16_z))) svuint16_t svmul_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s8_z))) svint8_t svmul_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s32_z))) svint32_t svmul_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s64_z))) svint64_t svmul_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_s16_z))) svint16_t svmul_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_f64))) svfloat64_t svmul_lane(svfloat64_t, svfloat64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_f32))) svfloat32_t svmul_lane(svfloat32_t, svfloat32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_f16))) svfloat16_t svmul_lane(svfloat16_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s8_m))) svint8_t svmulh_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s32_m))) svint32_t svmulh_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s64_m))) svint64_t svmulh_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s16_m))) svint16_t svmulh_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s8_x))) svint8_t svmulh_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s32_x))) svint32_t svmulh_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s64_x))) svint64_t svmulh_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s16_x))) svint16_t svmulh_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s8_z))) svint8_t svmulh_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s32_z))) svint32_t svmulh_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s64_z))) svint64_t svmulh_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_s16_z))) svint16_t svmulh_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u8_m))) svuint8_t svmulh_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u32_m))) svuint32_t svmulh_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u64_m))) svuint64_t svmulh_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u16_m))) svuint16_t svmulh_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u8_x))) svuint8_t svmulh_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u32_x))) svuint32_t svmulh_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u64_x))) svuint64_t svmulh_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u16_x))) svuint16_t svmulh_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u8_z))) svuint8_t svmulh_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u32_z))) svuint32_t svmulh_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u64_z))) svuint64_t svmulh_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_n_u16_z))) svuint16_t svmulh_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s8_m))) svint8_t svmulh_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s32_m))) svint32_t svmulh_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s64_m))) svint64_t svmulh_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s16_m))) svint16_t svmulh_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s8_x))) svint8_t svmulh_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s32_x))) svint32_t svmulh_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s64_x))) svint64_t svmulh_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s16_x))) svint16_t svmulh_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s8_z))) svint8_t svmulh_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s32_z))) svint32_t svmulh_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s64_z))) svint64_t svmulh_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_s16_z))) svint16_t svmulh_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u8_m))) svuint8_t svmulh_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u32_m))) svuint32_t svmulh_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u64_m))) svuint64_t svmulh_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u16_m))) svuint16_t svmulh_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u8_x))) svuint8_t svmulh_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u32_x))) svuint32_t svmulh_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u64_x))) svuint64_t svmulh_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u16_x))) svuint16_t svmulh_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u8_z))) svuint8_t svmulh_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u32_z))) svuint32_t svmulh_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u64_z))) svuint64_t svmulh_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulh_u16_z))) svuint16_t svmulh_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f64_m))) svfloat64_t svmulx_m(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f32_m))) svfloat32_t svmulx_m(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f16_m))) svfloat16_t svmulx_m(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f64_x))) svfloat64_t svmulx_x(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f32_x))) svfloat32_t svmulx_x(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f16_x))) svfloat16_t svmulx_x(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f64_z))) svfloat64_t svmulx_z(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f32_z))) svfloat32_t svmulx_z(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_n_f16_z))) svfloat16_t svmulx_z(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f64_m))) svfloat64_t svmulx_m(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f32_m))) svfloat32_t svmulx_m(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f16_m))) svfloat16_t svmulx_m(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f64_x))) svfloat64_t svmulx_x(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f32_x))) svfloat32_t svmulx_x(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f16_x))) svfloat16_t svmulx_x(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f64_z))) svfloat64_t svmulx_z(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f32_z))) svfloat32_t svmulx_z(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmulx_f16_z))) svfloat16_t svmulx_z(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnand_b_z))) svbool_t svnand_z(svbool_t, svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f64_m))) svfloat64_t svneg_m(svfloat64_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f32_m))) svfloat32_t svneg_m(svfloat32_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f16_m))) svfloat16_t svneg_m(svfloat16_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f64_x))) svfloat64_t svneg_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f32_x))) svfloat32_t svneg_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f16_x))) svfloat16_t svneg_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f64_z))) svfloat64_t svneg_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f32_z))) svfloat32_t svneg_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_f16_z))) svfloat16_t svneg_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s8_m))) svint8_t svneg_m(svint8_t, svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s32_m))) svint32_t svneg_m(svint32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s64_m))) svint64_t svneg_m(svint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s16_m))) svint16_t svneg_m(svint16_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s8_x))) svint8_t svneg_x(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s32_x))) svint32_t svneg_x(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s64_x))) svint64_t svneg_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s16_x))) svint16_t svneg_x(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s8_z))) svint8_t svneg_z(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s32_z))) svint32_t svneg_z(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s64_z))) svint64_t svneg_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svneg_s16_z))) svint16_t svneg_z(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f64_m))) svfloat64_t svnmad_m(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f32_m))) svfloat32_t svnmad_m(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f16_m))) svfloat16_t svnmad_m(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f64_x))) svfloat64_t svnmad_x(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f32_x))) svfloat32_t svnmad_x(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f16_x))) svfloat16_t svnmad_x(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f64_z))) svfloat64_t svnmad_z(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f32_z))) svfloat32_t svnmad_z(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_n_f16_z))) svfloat16_t svnmad_z(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f64_m))) svfloat64_t svnmad_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f32_m))) svfloat32_t svnmad_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f16_m))) svfloat16_t svnmad_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f64_x))) svfloat64_t svnmad_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f32_x))) svfloat32_t svnmad_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f16_x))) svfloat16_t svnmad_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f64_z))) svfloat64_t svnmad_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f32_z))) svfloat32_t svnmad_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmad_f16_z))) svfloat16_t svnmad_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f64_m))) svfloat64_t svnmla_m(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f32_m))) svfloat32_t svnmla_m(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f16_m))) svfloat16_t svnmla_m(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f64_x))) svfloat64_t svnmla_x(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f32_x))) svfloat32_t svnmla_x(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f16_x))) svfloat16_t svnmla_x(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f64_z))) svfloat64_t svnmla_z(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f32_z))) svfloat32_t svnmla_z(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_n_f16_z))) svfloat16_t svnmla_z(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f64_m))) svfloat64_t svnmla_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f32_m))) svfloat32_t svnmla_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f16_m))) svfloat16_t svnmla_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f64_x))) svfloat64_t svnmla_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f32_x))) svfloat32_t svnmla_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f16_x))) svfloat16_t svnmla_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f64_z))) svfloat64_t svnmla_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f32_z))) svfloat32_t svnmla_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmla_f16_z))) svfloat16_t svnmla_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f64_m))) svfloat64_t svnmls_m(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f32_m))) svfloat32_t svnmls_m(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f16_m))) svfloat16_t svnmls_m(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f64_x))) svfloat64_t svnmls_x(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f32_x))) svfloat32_t svnmls_x(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f16_x))) svfloat16_t svnmls_x(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f64_z))) svfloat64_t svnmls_z(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f32_z))) svfloat32_t svnmls_z(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_n_f16_z))) svfloat16_t svnmls_z(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f64_m))) svfloat64_t svnmls_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f32_m))) svfloat32_t svnmls_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f16_m))) svfloat16_t svnmls_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f64_x))) svfloat64_t svnmls_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f32_x))) svfloat32_t svnmls_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f16_x))) svfloat16_t svnmls_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f64_z))) svfloat64_t svnmls_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f32_z))) svfloat32_t svnmls_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmls_f16_z))) svfloat16_t svnmls_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f64_m))) svfloat64_t svnmsb_m(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f32_m))) svfloat32_t svnmsb_m(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f16_m))) svfloat16_t svnmsb_m(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f64_x))) svfloat64_t svnmsb_x(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f32_x))) svfloat32_t svnmsb_x(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f16_x))) svfloat16_t svnmsb_x(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f64_z))) svfloat64_t svnmsb_z(svbool_t, svfloat64_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f32_z))) svfloat32_t svnmsb_z(svbool_t, svfloat32_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_n_f16_z))) svfloat16_t svnmsb_z(svbool_t, svfloat16_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f64_m))) svfloat64_t svnmsb_m(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f32_m))) svfloat32_t svnmsb_m(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f16_m))) svfloat16_t svnmsb_m(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f64_x))) svfloat64_t svnmsb_x(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f32_x))) svfloat32_t svnmsb_x(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f16_x))) svfloat16_t svnmsb_x(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f64_z))) svfloat64_t svnmsb_z(svbool_t, svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f32_z))) svfloat32_t svnmsb_z(svbool_t, svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmsb_f16_z))) svfloat16_t svnmsb_z(svbool_t, svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnor_b_z))) svbool_t svnor_z(svbool_t, svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_b_z))) svbool_t svnot_z(svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u8_m))) svuint8_t svnot_m(svuint8_t, svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u32_m))) svuint32_t svnot_m(svuint32_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u64_m))) svuint64_t svnot_m(svuint64_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u16_m))) svuint16_t svnot_m(svuint16_t, svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s8_m))) svint8_t svnot_m(svint8_t, svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s32_m))) svint32_t svnot_m(svint32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s64_m))) svint64_t svnot_m(svint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s16_m))) svint16_t svnot_m(svint16_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u8_x))) svuint8_t svnot_x(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u32_x))) svuint32_t svnot_x(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u64_x))) svuint64_t svnot_x(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u16_x))) svuint16_t svnot_x(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s8_x))) svint8_t svnot_x(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s32_x))) svint32_t svnot_x(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s64_x))) svint64_t svnot_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s16_x))) svint16_t svnot_x(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u8_z))) svuint8_t svnot_z(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u32_z))) svuint32_t svnot_z(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u64_z))) svuint64_t svnot_z(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_u16_z))) svuint16_t svnot_z(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s8_z))) svint8_t svnot_z(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s32_z))) svint32_t svnot_z(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s64_z))) svint64_t svnot_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnot_s16_z))) svint16_t svnot_z(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorn_b_z))) svbool_t svorn_z(svbool_t, svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_b_z))) svbool_t svorr_z(svbool_t, svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u8_m))) svuint8_t svorr_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u32_m))) svuint32_t svorr_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u64_m))) svuint64_t svorr_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u16_m))) svuint16_t svorr_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s8_m))) svint8_t svorr_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s32_m))) svint32_t svorr_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s64_m))) svint64_t svorr_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s16_m))) svint16_t svorr_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u8_x))) svuint8_t svorr_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u32_x))) svuint32_t svorr_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u64_x))) svuint64_t svorr_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u16_x))) svuint16_t svorr_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s8_x))) svint8_t svorr_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s32_x))) svint32_t svorr_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s64_x))) svint64_t svorr_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s16_x))) svint16_t svorr_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u8_z))) svuint8_t svorr_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u32_z))) svuint32_t svorr_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u64_z))) svuint64_t svorr_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_u16_z))) svuint16_t svorr_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s8_z))) svint8_t svorr_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s32_z))) svint32_t svorr_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s64_z))) svint64_t svorr_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_n_s16_z))) svint16_t svorr_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u8_m))) svuint8_t svorr_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u32_m))) svuint32_t svorr_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u64_m))) svuint64_t svorr_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u16_m))) svuint16_t svorr_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s8_m))) svint8_t svorr_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s32_m))) svint32_t svorr_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s64_m))) svint64_t svorr_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s16_m))) svint16_t svorr_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u8_x))) svuint8_t svorr_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u32_x))) svuint32_t svorr_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u64_x))) svuint64_t svorr_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u16_x))) svuint16_t svorr_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s8_x))) svint8_t svorr_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s32_x))) svint32_t svorr_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s64_x))) svint64_t svorr_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s16_x))) svint16_t svorr_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u8_z))) svuint8_t svorr_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u32_z))) svuint32_t svorr_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u64_z))) svuint64_t svorr_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_u16_z))) svuint16_t svorr_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s8_z))) svint8_t svorr_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s32_z))) svint32_t svorr_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s64_z))) svint64_t svorr_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorr_s16_z))) svint16_t svorr_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorv_u8))) uint8_t svorv(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorv_u32))) uint32_t svorv(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorv_u64))) uint64_t svorv(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorv_u16))) uint16_t svorv(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorv_s8))) int8_t svorv(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorv_s32))) int32_t svorv(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorv_s64))) int64_t svorv(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorv_s16))) int16_t svorv(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpfalse_b))) svbool_t svpfalse(void); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpfirst_b))) svbool_t svpfirst(svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_u32base))) void svprfb_gather(svbool_t, svuint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_u64base))) void svprfb_gather(svbool_t, svuint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_u32base_offset))) void svprfb_gather_offset(svbool_t, svuint32_t, int64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_u64base_offset))) void svprfb_gather_offset(svbool_t, svuint64_t, int64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_s32offset))) void svprfb_gather_offset(svbool_t, void const *, svint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_u32offset))) void svprfb_gather_offset(svbool_t, void const *, svuint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_s64offset))) void svprfb_gather_offset(svbool_t, void const *, svint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_u64offset))) void svprfb_gather_offset(svbool_t, void const *, svuint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_u32base))) void svprfd_gather(svbool_t, svuint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_u64base))) void svprfd_gather(svbool_t, svuint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_u32base_index))) void svprfd_gather_index(svbool_t, svuint32_t, int64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_u64base_index))) void svprfd_gather_index(svbool_t, svuint64_t, int64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_s32index))) void svprfd_gather_index(svbool_t, void const *, svint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_u32index))) void svprfd_gather_index(svbool_t, void const *, svuint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_s64index))) void svprfd_gather_index(svbool_t, void const *, svint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_u64index))) void svprfd_gather_index(svbool_t, void const *, svuint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_u32base))) void svprfh_gather(svbool_t, svuint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_u64base))) void svprfh_gather(svbool_t, svuint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_u32base_index))) void svprfh_gather_index(svbool_t, svuint32_t, int64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_u64base_index))) void svprfh_gather_index(svbool_t, svuint64_t, int64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_s32index))) void svprfh_gather_index(svbool_t, void const *, svint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_u32index))) void svprfh_gather_index(svbool_t, void const *, svuint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_s64index))) void svprfh_gather_index(svbool_t, void const *, svint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_u64index))) void svprfh_gather_index(svbool_t, void const *, svuint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_u32base))) void svprfw_gather(svbool_t, svuint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_u64base))) void svprfw_gather(svbool_t, svuint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_u32base_index))) void svprfw_gather_index(svbool_t, svuint32_t, int64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_u64base_index))) void svprfw_gather_index(svbool_t, svuint64_t, int64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_s32index))) void svprfw_gather_index(svbool_t, void const *, svint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_u32index))) void svprfw_gather_index(svbool_t, void const *, svuint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_s64index))) void svprfw_gather_index(svbool_t, void const *, svint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_u64index))) void svprfw_gather_index(svbool_t, void const *, svuint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s8))) svint8_t svqadd(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s32))) svint32_t svqadd(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s64))) svint64_t svqadd(svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s16))) svint16_t svqadd(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u8))) svuint8_t svqadd(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u32))) svuint32_t svqadd(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u64))) svuint64_t svqadd(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u16))) svuint16_t svqadd(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s8))) svint8_t svqadd(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s32))) svint32_t svqadd(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s64))) svint64_t svqadd(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s16))) svint16_t svqadd(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u8))) svuint8_t svqadd(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u32))) svuint32_t svqadd(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u64))) svuint64_t svqadd(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u16))) svuint16_t svqadd(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_n_s32))) int32_t svqdecb(int32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_n_s64))) int64_t svqdecb(int64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_n_u32))) uint32_t svqdecb(uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_n_u64))) uint64_t svqdecb(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_pat_n_s32))) int32_t svqdecb_pat(int32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_pat_n_s64))) int64_t svqdecb_pat(int64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_pat_n_u32))) uint32_t svqdecb_pat(uint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_pat_n_u64))) uint64_t svqdecb_pat(uint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_n_s32))) int32_t svqdecd(int32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_n_s64))) int64_t svqdecd(int64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_n_u32))) uint32_t svqdecd(uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_n_u64))) uint64_t svqdecd(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_s64))) svint64_t svqdecd(svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_u64))) svuint64_t svqdecd(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_pat_n_s32))) int32_t svqdecd_pat(int32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_pat_n_s64))) int64_t svqdecd_pat(int64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_pat_n_u32))) uint32_t svqdecd_pat(uint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_pat_n_u64))) uint64_t svqdecd_pat(uint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_pat_s64))) svint64_t svqdecd_pat(svint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_pat_u64))) svuint64_t svqdecd_pat(svuint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_n_s32))) int32_t svqdech(int32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_n_s64))) int64_t svqdech(int64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_n_u32))) uint32_t svqdech(uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_n_u64))) uint64_t svqdech(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_s16))) svint16_t svqdech(svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_u16))) svuint16_t svqdech(svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_pat_n_s32))) int32_t svqdech_pat(int32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_pat_n_s64))) int64_t svqdech_pat(int64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_pat_n_u32))) uint32_t svqdech_pat(uint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_pat_n_u64))) uint64_t svqdech_pat(uint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_pat_s16))) svint16_t svqdech_pat(svint16_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_pat_u16))) svuint16_t svqdech_pat(svuint16_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_s32_b8))) int32_t svqdecp_b8(int32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_s32_b32))) int32_t svqdecp_b32(int32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_s32_b64))) int32_t svqdecp_b64(int32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_s32_b16))) int32_t svqdecp_b16(int32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_s64_b8))) int64_t svqdecp_b8(int64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_s64_b32))) int64_t svqdecp_b32(int64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_s64_b64))) int64_t svqdecp_b64(int64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_s64_b16))) int64_t svqdecp_b16(int64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_u32_b8))) uint32_t svqdecp_b8(uint32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_u32_b32))) uint32_t svqdecp_b32(uint32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_u32_b64))) uint32_t svqdecp_b64(uint32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_u32_b16))) uint32_t svqdecp_b16(uint32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_u64_b8))) uint64_t svqdecp_b8(uint64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_u64_b32))) uint64_t svqdecp_b32(uint64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_u64_b64))) uint64_t svqdecp_b64(uint64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_u64_b16))) uint64_t svqdecp_b16(uint64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_s32))) svint32_t svqdecp(svint32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_s64))) svint64_t svqdecp(svint64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_s16))) svint16_t svqdecp(svint16_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_u32))) svuint32_t svqdecp(svuint32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_u64))) svuint64_t svqdecp(svuint64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_u16))) svuint16_t svqdecp(svuint16_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_n_s32))) int32_t svqdecw(int32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_n_s64))) int64_t svqdecw(int64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_n_u32))) uint32_t svqdecw(uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_n_u64))) uint64_t svqdecw(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_s32))) svint32_t svqdecw(svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_u32))) svuint32_t svqdecw(svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_pat_n_s32))) int32_t svqdecw_pat(int32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_pat_n_s64))) int64_t svqdecw_pat(int64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_pat_n_u32))) uint32_t svqdecw_pat(uint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_pat_n_u64))) uint64_t svqdecw_pat(uint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_pat_s32))) svint32_t svqdecw_pat(svint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_pat_u32))) svuint32_t svqdecw_pat(svuint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_n_s32))) int32_t svqincb(int32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_n_s64))) int64_t svqincb(int64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_n_u32))) uint32_t svqincb(uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_n_u64))) uint64_t svqincb(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_pat_n_s32))) int32_t svqincb_pat(int32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_pat_n_s64))) int64_t svqincb_pat(int64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_pat_n_u32))) uint32_t svqincb_pat(uint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_pat_n_u64))) uint64_t svqincb_pat(uint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_n_s32))) int32_t svqincd(int32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_n_s64))) int64_t svqincd(int64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_n_u32))) uint32_t svqincd(uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_n_u64))) uint64_t svqincd(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_s64))) svint64_t svqincd(svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_u64))) svuint64_t svqincd(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_pat_n_s32))) int32_t svqincd_pat(int32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_pat_n_s64))) int64_t svqincd_pat(int64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_pat_n_u32))) uint32_t svqincd_pat(uint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_pat_n_u64))) uint64_t svqincd_pat(uint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_pat_s64))) svint64_t svqincd_pat(svint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_pat_u64))) svuint64_t svqincd_pat(svuint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_n_s32))) int32_t svqinch(int32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_n_s64))) int64_t svqinch(int64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_n_u32))) uint32_t svqinch(uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_n_u64))) uint64_t svqinch(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_s16))) svint16_t svqinch(svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_u16))) svuint16_t svqinch(svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_pat_n_s32))) int32_t svqinch_pat(int32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_pat_n_s64))) int64_t svqinch_pat(int64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_pat_n_u32))) uint32_t svqinch_pat(uint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_pat_n_u64))) uint64_t svqinch_pat(uint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_pat_s16))) svint16_t svqinch_pat(svint16_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_pat_u16))) svuint16_t svqinch_pat(svuint16_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_s32_b8))) int32_t svqincp_b8(int32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_s32_b32))) int32_t svqincp_b32(int32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_s32_b64))) int32_t svqincp_b64(int32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_s32_b16))) int32_t svqincp_b16(int32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_s64_b8))) int64_t svqincp_b8(int64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_s64_b32))) int64_t svqincp_b32(int64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_s64_b64))) int64_t svqincp_b64(int64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_s64_b16))) int64_t svqincp_b16(int64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_u32_b8))) uint32_t svqincp_b8(uint32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_u32_b32))) uint32_t svqincp_b32(uint32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_u32_b64))) uint32_t svqincp_b64(uint32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_u32_b16))) uint32_t svqincp_b16(uint32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_u64_b8))) uint64_t svqincp_b8(uint64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_u64_b32))) uint64_t svqincp_b32(uint64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_u64_b64))) uint64_t svqincp_b64(uint64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_u64_b16))) uint64_t svqincp_b16(uint64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_s32))) svint32_t svqincp(svint32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_s64))) svint64_t svqincp(svint64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_s16))) svint16_t svqincp(svint16_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_u32))) svuint32_t svqincp(svuint32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_u64))) svuint64_t svqincp(svuint64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_u16))) svuint16_t svqincp(svuint16_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_n_s32))) int32_t svqincw(int32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_n_s64))) int64_t svqincw(int64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_n_u32))) uint32_t svqincw(uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_n_u64))) uint64_t svqincw(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_s32))) svint32_t svqincw(svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_u32))) svuint32_t svqincw(svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_pat_n_s32))) int32_t svqincw_pat(int32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_pat_n_s64))) int64_t svqincw_pat(int64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_pat_n_u32))) uint32_t svqincw_pat(uint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_pat_n_u64))) uint64_t svqincw_pat(uint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_pat_s32))) svint32_t svqincw_pat(svint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_pat_u32))) svuint32_t svqincw_pat(svuint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s8))) svint8_t svqsub(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s32))) svint32_t svqsub(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s64))) svint64_t svqsub(svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s16))) svint16_t svqsub(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u8))) svuint8_t svqsub(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u32))) svuint32_t svqsub(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u64))) svuint64_t svqsub(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u16))) svuint16_t svqsub(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s8))) svint8_t svqsub(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s32))) svint32_t svqsub(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s64))) svint64_t svqsub(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s16))) svint16_t svqsub(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u8))) svuint8_t svqsub(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u32))) svuint32_t svqsub(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u64))) svuint64_t svqsub(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u16))) svuint16_t svqsub(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u8_m))) svuint8_t svrbit_m(svuint8_t, svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u32_m))) svuint32_t svrbit_m(svuint32_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u64_m))) svuint64_t svrbit_m(svuint64_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u16_m))) svuint16_t svrbit_m(svuint16_t, svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s8_m))) svint8_t svrbit_m(svint8_t, svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s32_m))) svint32_t svrbit_m(svint32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s64_m))) svint64_t svrbit_m(svint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s16_m))) svint16_t svrbit_m(svint16_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u8_x))) svuint8_t svrbit_x(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u32_x))) svuint32_t svrbit_x(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u64_x))) svuint64_t svrbit_x(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u16_x))) svuint16_t svrbit_x(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s8_x))) svint8_t svrbit_x(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s32_x))) svint32_t svrbit_x(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s64_x))) svint64_t svrbit_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s16_x))) svint16_t svrbit_x(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u8_z))) svuint8_t svrbit_z(svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u32_z))) svuint32_t svrbit_z(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u64_z))) svuint64_t svrbit_z(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_u16_z))) svuint16_t svrbit_z(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s8_z))) svint8_t svrbit_z(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s32_z))) svint32_t svrbit_z(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s64_z))) svint64_t svrbit_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrbit_s16_z))) svint16_t svrbit_z(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpe_f64))) svfloat64_t svrecpe(svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpe_f32))) svfloat32_t svrecpe(svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpe_f16))) svfloat16_t svrecpe(svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecps_f64))) svfloat64_t svrecps(svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecps_f32))) svfloat32_t svrecps(svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecps_f16))) svfloat16_t svrecps(svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f64_m))) svfloat64_t svrecpx_m(svfloat64_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f32_m))) svfloat32_t svrecpx_m(svfloat32_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f16_m))) svfloat16_t svrecpx_m(svfloat16_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f64_x))) svfloat64_t svrecpx_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f32_x))) svfloat32_t svrecpx_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f16_x))) svfloat16_t svrecpx_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f64_z))) svfloat64_t svrecpx_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f32_z))) svfloat32_t svrecpx_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpx_f16_z))) svfloat16_t svrecpx_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_u8))) svuint8_t svrev(svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_u32))) svuint32_t svrev(svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_u64))) svuint64_t svrev(svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_u16))) svuint16_t svrev(svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_s8))) svint8_t svrev(svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_f64))) svfloat64_t svrev(svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_f32))) svfloat32_t svrev(svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_f16))) svfloat16_t svrev(svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_s32))) svint32_t svrev(svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_s64))) svint64_t svrev(svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_s16))) svint16_t svrev(svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u32_m))) svuint32_t svrevb_m(svuint32_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u64_m))) svuint64_t svrevb_m(svuint64_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u16_m))) svuint16_t svrevb_m(svuint16_t, svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s32_m))) svint32_t svrevb_m(svint32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s64_m))) svint64_t svrevb_m(svint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s16_m))) svint16_t svrevb_m(svint16_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u32_x))) svuint32_t svrevb_x(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u64_x))) svuint64_t svrevb_x(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u16_x))) svuint16_t svrevb_x(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s32_x))) svint32_t svrevb_x(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s64_x))) svint64_t svrevb_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s16_x))) svint16_t svrevb_x(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u32_z))) svuint32_t svrevb_z(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u64_z))) svuint64_t svrevb_z(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_u16_z))) svuint16_t svrevb_z(svbool_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s32_z))) svint32_t svrevb_z(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s64_z))) svint64_t svrevb_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevb_s16_z))) svint16_t svrevb_z(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_u32_m))) svuint32_t svrevh_m(svuint32_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_u64_m))) svuint64_t svrevh_m(svuint64_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_s32_m))) svint32_t svrevh_m(svint32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_s64_m))) svint64_t svrevh_m(svint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_u32_x))) svuint32_t svrevh_x(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_u64_x))) svuint64_t svrevh_x(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_s32_x))) svint32_t svrevh_x(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_s64_x))) svint64_t svrevh_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_u32_z))) svuint32_t svrevh_z(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_u64_z))) svuint64_t svrevh_z(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_s32_z))) svint32_t svrevh_z(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevh_s64_z))) svint64_t svrevh_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevw_u64_m))) svuint64_t svrevw_m(svuint64_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevw_s64_m))) svint64_t svrevw_m(svint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevw_u64_x))) svuint64_t svrevw_x(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevw_s64_x))) svint64_t svrevw_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevw_u64_z))) svuint64_t svrevw_z(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevw_s64_z))) svint64_t svrevw_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f64_m))) svfloat64_t svrinta_m(svfloat64_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f32_m))) svfloat32_t svrinta_m(svfloat32_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f16_m))) svfloat16_t svrinta_m(svfloat16_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f64_x))) svfloat64_t svrinta_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f32_x))) svfloat32_t svrinta_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f16_x))) svfloat16_t svrinta_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f64_z))) svfloat64_t svrinta_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f32_z))) svfloat32_t svrinta_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinta_f16_z))) svfloat16_t svrinta_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f64_m))) svfloat64_t svrinti_m(svfloat64_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f32_m))) svfloat32_t svrinti_m(svfloat32_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f16_m))) svfloat16_t svrinti_m(svfloat16_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f64_x))) svfloat64_t svrinti_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f32_x))) svfloat32_t svrinti_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f16_x))) svfloat16_t svrinti_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f64_z))) svfloat64_t svrinti_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f32_z))) svfloat32_t svrinti_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrinti_f16_z))) svfloat16_t svrinti_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f64_m))) svfloat64_t svrintm_m(svfloat64_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f32_m))) svfloat32_t svrintm_m(svfloat32_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f16_m))) svfloat16_t svrintm_m(svfloat16_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f64_x))) svfloat64_t svrintm_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f32_x))) svfloat32_t svrintm_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f16_x))) svfloat16_t svrintm_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f64_z))) svfloat64_t svrintm_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f32_z))) svfloat32_t svrintm_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintm_f16_z))) svfloat16_t svrintm_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f64_m))) svfloat64_t svrintn_m(svfloat64_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f32_m))) svfloat32_t svrintn_m(svfloat32_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f16_m))) svfloat16_t svrintn_m(svfloat16_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f64_x))) svfloat64_t svrintn_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f32_x))) svfloat32_t svrintn_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f16_x))) svfloat16_t svrintn_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f64_z))) svfloat64_t svrintn_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f32_z))) svfloat32_t svrintn_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintn_f16_z))) svfloat16_t svrintn_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f64_m))) svfloat64_t svrintp_m(svfloat64_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f32_m))) svfloat32_t svrintp_m(svfloat32_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f16_m))) svfloat16_t svrintp_m(svfloat16_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f64_x))) svfloat64_t svrintp_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f32_x))) svfloat32_t svrintp_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f16_x))) svfloat16_t svrintp_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f64_z))) svfloat64_t svrintp_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f32_z))) svfloat32_t svrintp_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintp_f16_z))) svfloat16_t svrintp_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f64_m))) svfloat64_t svrintx_m(svfloat64_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f32_m))) svfloat32_t svrintx_m(svfloat32_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f16_m))) svfloat16_t svrintx_m(svfloat16_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f64_x))) svfloat64_t svrintx_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f32_x))) svfloat32_t svrintx_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f16_x))) svfloat16_t svrintx_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f64_z))) svfloat64_t svrintx_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f32_z))) svfloat32_t svrintx_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintx_f16_z))) svfloat16_t svrintx_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f64_m))) svfloat64_t svrintz_m(svfloat64_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f32_m))) svfloat32_t svrintz_m(svfloat32_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f16_m))) svfloat16_t svrintz_m(svfloat16_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f64_x))) svfloat64_t svrintz_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f32_x))) svfloat32_t svrintz_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f16_x))) svfloat16_t svrintz_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f64_z))) svfloat64_t svrintz_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f32_z))) svfloat32_t svrintz_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrintz_f16_z))) svfloat16_t svrintz_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrte_f64))) svfloat64_t svrsqrte(svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrte_f32))) svfloat32_t svrsqrte(svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrte_f16))) svfloat16_t svrsqrte(svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrts_f64))) svfloat64_t svrsqrts(svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrts_f32))) svfloat32_t svrsqrts(svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrts_f16))) svfloat16_t svrsqrts(svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f64_m))) svfloat64_t svscale_m(svbool_t, svfloat64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f32_m))) svfloat32_t svscale_m(svbool_t, svfloat32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f16_m))) svfloat16_t svscale_m(svbool_t, svfloat16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f64_x))) svfloat64_t svscale_x(svbool_t, svfloat64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f32_x))) svfloat32_t svscale_x(svbool_t, svfloat32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f16_x))) svfloat16_t svscale_x(svbool_t, svfloat16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f64_z))) svfloat64_t svscale_z(svbool_t, svfloat64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f32_z))) svfloat32_t svscale_z(svbool_t, svfloat32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_f16_z))) svfloat16_t svscale_z(svbool_t, svfloat16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f64_m))) svfloat64_t svscale_m(svbool_t, svfloat64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f32_m))) svfloat32_t svscale_m(svbool_t, svfloat32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f16_m))) svfloat16_t svscale_m(svbool_t, svfloat16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f64_x))) svfloat64_t svscale_x(svbool_t, svfloat64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f32_x))) svfloat32_t svscale_x(svbool_t, svfloat32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f16_x))) svfloat16_t svscale_x(svbool_t, svfloat16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f64_z))) svfloat64_t svscale_z(svbool_t, svfloat64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f32_z))) svfloat32_t svscale_z(svbool_t, svfloat32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f16_z))) svfloat16_t svscale_z(svbool_t, svfloat16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_b))) svbool_t svsel(svbool_t, svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u8))) svuint8_t svsel(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u32))) svuint32_t svsel(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u64))) svuint64_t svsel(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u16))) svuint16_t svsel(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s8))) svint8_t svsel(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f64))) svfloat64_t svsel(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f32))) svfloat32_t svsel(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f16))) svfloat16_t svsel(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s32))) svint32_t svsel(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s64))) svint64_t svsel(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s16))) svint16_t svsel(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_u8))) svuint8x2_t svset2(svuint8x2_t, uint64_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_u32))) svuint32x2_t svset2(svuint32x2_t, uint64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_u64))) svuint64x2_t svset2(svuint64x2_t, uint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_u16))) svuint16x2_t svset2(svuint16x2_t, uint64_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_s8))) svint8x2_t svset2(svint8x2_t, uint64_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_f64))) svfloat64x2_t svset2(svfloat64x2_t, uint64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_f32))) svfloat32x2_t svset2(svfloat32x2_t, uint64_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_f16))) svfloat16x2_t svset2(svfloat16x2_t, uint64_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_s32))) svint32x2_t svset2(svint32x2_t, uint64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_s64))) svint64x2_t svset2(svint64x2_t, uint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_s16))) svint16x2_t svset2(svint16x2_t, uint64_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_u8))) svuint8x3_t svset3(svuint8x3_t, uint64_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_u32))) svuint32x3_t svset3(svuint32x3_t, uint64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_u64))) svuint64x3_t svset3(svuint64x3_t, uint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_u16))) svuint16x3_t svset3(svuint16x3_t, uint64_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_s8))) svint8x3_t svset3(svint8x3_t, uint64_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_f64))) svfloat64x3_t svset3(svfloat64x3_t, uint64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_f32))) svfloat32x3_t svset3(svfloat32x3_t, uint64_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_f16))) svfloat16x3_t svset3(svfloat16x3_t, uint64_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_s32))) svint32x3_t svset3(svint32x3_t, uint64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_s64))) svint64x3_t svset3(svint64x3_t, uint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_s16))) svint16x3_t svset3(svint16x3_t, uint64_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_u8))) svuint8x4_t svset4(svuint8x4_t, uint64_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_u32))) svuint32x4_t svset4(svuint32x4_t, uint64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_u64))) svuint64x4_t svset4(svuint64x4_t, uint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_u16))) svuint16x4_t svset4(svuint16x4_t, uint64_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_s8))) svint8x4_t svset4(svint8x4_t, uint64_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_f64))) svfloat64x4_t svset4(svfloat64x4_t, uint64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_f32))) svfloat32x4_t svset4(svfloat32x4_t, uint64_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_f16))) svfloat16x4_t svset4(svfloat16x4_t, uint64_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_s32))) svint32x4_t svset4(svint32x4_t, uint64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_s64))) svint64x4_t svset4(svint64x4_t, uint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_s16))) svint16x4_t svset4(svint16x4_t, uint64_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_u8))) svuint8_t svsplice(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_u32))) svuint32_t svsplice(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_u64))) svuint64_t svsplice(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_u16))) svuint16_t svsplice(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_s8))) svint8_t svsplice(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_f64))) svfloat64_t svsplice(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_f32))) svfloat32_t svsplice(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_f16))) svfloat16_t svsplice(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_s32))) svint32_t svsplice(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_s64))) svint64_t svsplice(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_s16))) svint16_t svsplice(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f64_m))) svfloat64_t svsqrt_m(svfloat64_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f32_m))) svfloat32_t svsqrt_m(svfloat32_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f16_m))) svfloat16_t svsqrt_m(svfloat16_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f64_x))) svfloat64_t svsqrt_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f32_x))) svfloat32_t svsqrt_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f16_x))) svfloat16_t svsqrt_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f64_z))) svfloat64_t svsqrt_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f32_z))) svfloat32_t svsqrt_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqrt_f16_z))) svfloat16_t svsqrt_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8))) void svst1(svbool_t, uint8_t *, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u32))) void svst1(svbool_t, uint32_t *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64))) void svst1(svbool_t, uint64_t *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u16))) void svst1(svbool_t, uint16_t *, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8))) void svst1(svbool_t, int8_t *, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64))) void svst1(svbool_t, float64_t *, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f32))) void svst1(svbool_t, float32_t *, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f16))) void svst1(svbool_t, float16_t *, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s32))) void svst1(svbool_t, int32_t *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s64))) void svst1(svbool_t, int64_t *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s16))) void svst1(svbool_t, int16_t *, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_index_u32))) void svst1_scatter_index(svbool_t, svuint32_t, int64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_index_u64))) void svst1_scatter_index(svbool_t, svuint64_t, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_index_f64))) void svst1_scatter_index(svbool_t, svuint64_t, int64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_index_f32))) void svst1_scatter_index(svbool_t, svuint32_t, int64_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_index_s32))) void svst1_scatter_index(svbool_t, svuint32_t, int64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_index_s64))) void svst1_scatter_index(svbool_t, svuint64_t, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_offset_u32))) void svst1_scatter_offset(svbool_t, svuint32_t, int64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_offset_u64))) void svst1_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_offset_f64))) void svst1_scatter_offset(svbool_t, svuint64_t, int64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_offset_f32))) void svst1_scatter_offset(svbool_t, svuint32_t, int64_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_offset_s32))) void svst1_scatter_offset(svbool_t, svuint32_t, int64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_offset_s64))) void svst1_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_u32))) void svst1_scatter(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_u64))) void svst1_scatter(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_f64))) void svst1_scatter(svbool_t, svuint64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_f32))) void svst1_scatter(svbool_t, svuint32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32base_s32))) void svst1_scatter(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64base_s64))) void svst1_scatter(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s32index_u32))) void svst1_scatter_index(svbool_t, uint32_t *, svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s32index_f32))) void svst1_scatter_index(svbool_t, float32_t *, svint32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s32index_s32))) void svst1_scatter_index(svbool_t, int32_t *, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32index_u32))) void svst1_scatter_index(svbool_t, uint32_t *, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32index_f32))) void svst1_scatter_index(svbool_t, float32_t *, svuint32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32index_s32))) void svst1_scatter_index(svbool_t, int32_t *, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s64index_u64))) void svst1_scatter_index(svbool_t, uint64_t *, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s64index_f64))) void svst1_scatter_index(svbool_t, float64_t *, svint64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s64index_s64))) void svst1_scatter_index(svbool_t, int64_t *, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64index_u64))) void svst1_scatter_index(svbool_t, uint64_t *, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64index_f64))) void svst1_scatter_index(svbool_t, float64_t *, svuint64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64index_s64))) void svst1_scatter_index(svbool_t, int64_t *, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s32offset_u32))) void svst1_scatter_offset(svbool_t, uint32_t *, svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s32offset_f32))) void svst1_scatter_offset(svbool_t, float32_t *, svint32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s32offset_s32))) void svst1_scatter_offset(svbool_t, int32_t *, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32offset_u32))) void svst1_scatter_offset(svbool_t, uint32_t *, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32offset_f32))) void svst1_scatter_offset(svbool_t, float32_t *, svuint32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u32offset_s32))) void svst1_scatter_offset(svbool_t, int32_t *, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s64offset_u64))) void svst1_scatter_offset(svbool_t, uint64_t *, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s64offset_f64))) void svst1_scatter_offset(svbool_t, float64_t *, svint64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_s64offset_s64))) void svst1_scatter_offset(svbool_t, int64_t *, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64offset_u64))) void svst1_scatter_offset(svbool_t, uint64_t *, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64offset_f64))) void svst1_scatter_offset(svbool_t, float64_t *, svuint64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_scatter_u64offset_s64))) void svst1_scatter_offset(svbool_t, int64_t *, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8))) void svst1_vnum(svbool_t, uint8_t *, int64_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u32))) void svst1_vnum(svbool_t, uint32_t *, int64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64))) void svst1_vnum(svbool_t, uint64_t *, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u16))) void svst1_vnum(svbool_t, uint16_t *, int64_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8))) void svst1_vnum(svbool_t, int8_t *, int64_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64))) void svst1_vnum(svbool_t, float64_t *, int64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f32))) void svst1_vnum(svbool_t, float32_t *, int64_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f16))) void svst1_vnum(svbool_t, float16_t *, int64_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s32))) void svst1_vnum(svbool_t, int32_t *, int64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s64))) void svst1_vnum(svbool_t, int64_t *, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s16))) void svst1_vnum(svbool_t, int16_t *, int64_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_s32))) void svst1b(svbool_t, int8_t *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_s64))) void svst1b(svbool_t, int8_t *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_s16))) void svst1b(svbool_t, int8_t *, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_u32))) void svst1b(svbool_t, uint8_t *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_u64))) void svst1b(svbool_t, uint8_t *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_u16))) void svst1b(svbool_t, uint8_t *, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u32base_offset_u32))) void svst1b_scatter_offset(svbool_t, svuint32_t, int64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u64base_offset_u64))) void svst1b_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u32base_offset_s32))) void svst1b_scatter_offset(svbool_t, svuint32_t, int64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u64base_offset_s64))) void svst1b_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u32base_u32))) void svst1b_scatter(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u64base_u64))) void svst1b_scatter(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u32base_s32))) void svst1b_scatter(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u64base_s64))) void svst1b_scatter(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_s32offset_s32))) void svst1b_scatter_offset(svbool_t, int8_t *, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_s32offset_u32))) void svst1b_scatter_offset(svbool_t, uint8_t *, svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u32offset_s32))) void svst1b_scatter_offset(svbool_t, int8_t *, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u32offset_u32))) void svst1b_scatter_offset(svbool_t, uint8_t *, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_s64offset_s64))) void svst1b_scatter_offset(svbool_t, int8_t *, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_s64offset_u64))) void svst1b_scatter_offset(svbool_t, uint8_t *, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u64offset_s64))) void svst1b_scatter_offset(svbool_t, int8_t *, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_scatter_u64offset_u64))) void svst1b_scatter_offset(svbool_t, uint8_t *, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_vnum_s32))) void svst1b_vnum(svbool_t, int8_t *, int64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_vnum_s64))) void svst1b_vnum(svbool_t, int8_t *, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_vnum_s16))) void svst1b_vnum(svbool_t, int8_t *, int64_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_vnum_u32))) void svst1b_vnum(svbool_t, uint8_t *, int64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_vnum_u64))) void svst1b_vnum(svbool_t, uint8_t *, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_vnum_u16))) void svst1b_vnum(svbool_t, uint8_t *, int64_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_s32))) void svst1h(svbool_t, int16_t *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_s64))) void svst1h(svbool_t, int16_t *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_u32))) void svst1h(svbool_t, uint16_t *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_u64))) void svst1h(svbool_t, uint16_t *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32base_index_u32))) void svst1h_scatter_index(svbool_t, svuint32_t, int64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64base_index_u64))) void svst1h_scatter_index(svbool_t, svuint64_t, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32base_index_s32))) void svst1h_scatter_index(svbool_t, svuint32_t, int64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64base_index_s64))) void svst1h_scatter_index(svbool_t, svuint64_t, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32base_offset_u32))) void svst1h_scatter_offset(svbool_t, svuint32_t, int64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64base_offset_u64))) void svst1h_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32base_offset_s32))) void svst1h_scatter_offset(svbool_t, svuint32_t, int64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64base_offset_s64))) void svst1h_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32base_u32))) void svst1h_scatter(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64base_u64))) void svst1h_scatter(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32base_s32))) void svst1h_scatter(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64base_s64))) void svst1h_scatter(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_s32index_s32))) void svst1h_scatter_index(svbool_t, int16_t *, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_s32index_u32))) void svst1h_scatter_index(svbool_t, uint16_t *, svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32index_s32))) void svst1h_scatter_index(svbool_t, int16_t *, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32index_u32))) void svst1h_scatter_index(svbool_t, uint16_t *, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_s64index_s64))) void svst1h_scatter_index(svbool_t, int16_t *, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_s64index_u64))) void svst1h_scatter_index(svbool_t, uint16_t *, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64index_s64))) void svst1h_scatter_index(svbool_t, int16_t *, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64index_u64))) void svst1h_scatter_index(svbool_t, uint16_t *, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_s32offset_s32))) void svst1h_scatter_offset(svbool_t, int16_t *, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_s32offset_u32))) void svst1h_scatter_offset(svbool_t, uint16_t *, svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32offset_s32))) void svst1h_scatter_offset(svbool_t, int16_t *, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u32offset_u32))) void svst1h_scatter_offset(svbool_t, uint16_t *, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_s64offset_s64))) void svst1h_scatter_offset(svbool_t, int16_t *, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_s64offset_u64))) void svst1h_scatter_offset(svbool_t, uint16_t *, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64offset_s64))) void svst1h_scatter_offset(svbool_t, int16_t *, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_scatter_u64offset_u64))) void svst1h_scatter_offset(svbool_t, uint16_t *, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_vnum_s32))) void svst1h_vnum(svbool_t, int16_t *, int64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_vnum_s64))) void svst1h_vnum(svbool_t, int16_t *, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_vnum_u32))) void svst1h_vnum(svbool_t, uint16_t *, int64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1h_vnum_u64))) void svst1h_vnum(svbool_t, uint16_t *, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_s64))) void svst1w(svbool_t, int32_t *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_u64))) void svst1w(svbool_t, uint32_t *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64base_index_u64))) void svst1w_scatter_index(svbool_t, svuint64_t, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64base_index_s64))) void svst1w_scatter_index(svbool_t, svuint64_t, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64base_offset_u64))) void svst1w_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64base_offset_s64))) void svst1w_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64base_u64))) void svst1w_scatter(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64base_s64))) void svst1w_scatter(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_s64index_s64))) void svst1w_scatter_index(svbool_t, int32_t *, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_s64index_u64))) void svst1w_scatter_index(svbool_t, uint32_t *, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64index_s64))) void svst1w_scatter_index(svbool_t, int32_t *, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64index_u64))) void svst1w_scatter_index(svbool_t, uint32_t *, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_s64offset_s64))) void svst1w_scatter_offset(svbool_t, int32_t *, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_s64offset_u64))) void svst1w_scatter_offset(svbool_t, uint32_t *, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64offset_s64))) void svst1w_scatter_offset(svbool_t, int32_t *, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_scatter_u64offset_u64))) void svst1w_scatter_offset(svbool_t, uint32_t *, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_vnum_s64))) void svst1w_vnum(svbool_t, int32_t *, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1w_vnum_u64))) void svst1w_vnum(svbool_t, uint32_t *, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_u8))) void svst2(svbool_t, uint8_t *, svuint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_u32))) void svst2(svbool_t, uint32_t *, svuint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_u64))) void svst2(svbool_t, uint64_t *, svuint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_u16))) void svst2(svbool_t, uint16_t *, svuint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_s8))) void svst2(svbool_t, int8_t *, svint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_f64))) void svst2(svbool_t, float64_t *, svfloat64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_f32))) void svst2(svbool_t, float32_t *, svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_f16))) void svst2(svbool_t, float16_t *, svfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_s32))) void svst2(svbool_t, int32_t *, svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_s64))) void svst2(svbool_t, int64_t *, svint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_s16))) void svst2(svbool_t, int16_t *, svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_u8))) void svst2_vnum(svbool_t, uint8_t *, int64_t, svuint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_u32))) void svst2_vnum(svbool_t, uint32_t *, int64_t, svuint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_u64))) void svst2_vnum(svbool_t, uint64_t *, int64_t, svuint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_u16))) void svst2_vnum(svbool_t, uint16_t *, int64_t, svuint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_s8))) void svst2_vnum(svbool_t, int8_t *, int64_t, svint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_f64))) void svst2_vnum(svbool_t, float64_t *, int64_t, svfloat64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_f32))) void svst2_vnum(svbool_t, float32_t *, int64_t, svfloat32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_f16))) void svst2_vnum(svbool_t, float16_t *, int64_t, svfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_s32))) void svst2_vnum(svbool_t, int32_t *, int64_t, svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_s64))) void svst2_vnum(svbool_t, int64_t *, int64_t, svint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_s16))) void svst2_vnum(svbool_t, int16_t *, int64_t, svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_u8))) void svst3(svbool_t, uint8_t *, svuint8x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_u32))) void svst3(svbool_t, uint32_t *, svuint32x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_u64))) void svst3(svbool_t, uint64_t *, svuint64x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_u16))) void svst3(svbool_t, uint16_t *, svuint16x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_s8))) void svst3(svbool_t, int8_t *, svint8x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_f64))) void svst3(svbool_t, float64_t *, svfloat64x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_f32))) void svst3(svbool_t, float32_t *, svfloat32x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_f16))) void svst3(svbool_t, float16_t *, svfloat16x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_s32))) void svst3(svbool_t, int32_t *, svint32x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_s64))) void svst3(svbool_t, int64_t *, svint64x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_s16))) void svst3(svbool_t, int16_t *, svint16x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_u8))) void svst3_vnum(svbool_t, uint8_t *, int64_t, svuint8x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_u32))) void svst3_vnum(svbool_t, uint32_t *, int64_t, svuint32x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_u64))) void svst3_vnum(svbool_t, uint64_t *, int64_t, svuint64x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_u16))) void svst3_vnum(svbool_t, uint16_t *, int64_t, svuint16x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_s8))) void svst3_vnum(svbool_t, int8_t *, int64_t, svint8x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_f64))) void svst3_vnum(svbool_t, float64_t *, int64_t, svfloat64x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_f32))) void svst3_vnum(svbool_t, float32_t *, int64_t, svfloat32x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_f16))) void svst3_vnum(svbool_t, float16_t *, int64_t, svfloat16x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_s32))) void svst3_vnum(svbool_t, int32_t *, int64_t, svint32x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_s64))) void svst3_vnum(svbool_t, int64_t *, int64_t, svint64x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_s16))) void svst3_vnum(svbool_t, int16_t *, int64_t, svint16x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_u8))) void svst4(svbool_t, uint8_t *, svuint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_u32))) void svst4(svbool_t, uint32_t *, svuint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_u64))) void svst4(svbool_t, uint64_t *, svuint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_u16))) void svst4(svbool_t, uint16_t *, svuint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_s8))) void svst4(svbool_t, int8_t *, svint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_f64))) void svst4(svbool_t, float64_t *, svfloat64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_f32))) void svst4(svbool_t, float32_t *, svfloat32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_f16))) void svst4(svbool_t, float16_t *, svfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_s32))) void svst4(svbool_t, int32_t *, svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_s64))) void svst4(svbool_t, int64_t *, svint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_s16))) void svst4(svbool_t, int16_t *, svint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_u8))) void svst4_vnum(svbool_t, uint8_t *, int64_t, svuint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_u32))) void svst4_vnum(svbool_t, uint32_t *, int64_t, svuint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_u64))) void svst4_vnum(svbool_t, uint64_t *, int64_t, svuint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_u16))) void svst4_vnum(svbool_t, uint16_t *, int64_t, svuint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_s8))) void svst4_vnum(svbool_t, int8_t *, int64_t, svint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_f64))) void svst4_vnum(svbool_t, float64_t *, int64_t, svfloat64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_f32))) void svst4_vnum(svbool_t, float32_t *, int64_t, svfloat32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_f16))) void svst4_vnum(svbool_t, float16_t *, int64_t, svfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_s32))) void svst4_vnum(svbool_t, int32_t *, int64_t, svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_s64))) void svst4_vnum(svbool_t, int64_t *, int64_t, svint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_s16))) void svst4_vnum(svbool_t, int16_t *, int64_t, svint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8))) void svstnt1(svbool_t, uint8_t *, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u32))) void svstnt1(svbool_t, uint32_t *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64))) void svstnt1(svbool_t, uint64_t *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u16))) void svstnt1(svbool_t, uint16_t *, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8))) void svstnt1(svbool_t, int8_t *, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64))) void svstnt1(svbool_t, float64_t *, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f32))) void svstnt1(svbool_t, float32_t *, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f16))) void svstnt1(svbool_t, float16_t *, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s32))) void svstnt1(svbool_t, int32_t *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s64))) void svstnt1(svbool_t, int64_t *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s16))) void svstnt1(svbool_t, int16_t *, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8))) void svstnt1_vnum(svbool_t, uint8_t *, int64_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u32))) void svstnt1_vnum(svbool_t, uint32_t *, int64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64))) void svstnt1_vnum(svbool_t, uint64_t *, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u16))) void svstnt1_vnum(svbool_t, uint16_t *, int64_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8))) void svstnt1_vnum(svbool_t, int8_t *, int64_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64))) void svstnt1_vnum(svbool_t, float64_t *, int64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f32))) void svstnt1_vnum(svbool_t, float32_t *, int64_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f16))) void svstnt1_vnum(svbool_t, float16_t *, int64_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s32))) void svstnt1_vnum(svbool_t, int32_t *, int64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s64))) void svstnt1_vnum(svbool_t, int64_t *, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s16))) void svstnt1_vnum(svbool_t, int16_t *, int64_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f64_m))) svfloat64_t svsub_m(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f32_m))) svfloat32_t svsub_m(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f16_m))) svfloat16_t svsub_m(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f64_x))) svfloat64_t svsub_x(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f32_x))) svfloat32_t svsub_x(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f16_x))) svfloat16_t svsub_x(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f64_z))) svfloat64_t svsub_z(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f32_z))) svfloat32_t svsub_z(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f16_z))) svfloat16_t svsub_z(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u8_m))) svuint8_t svsub_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u32_m))) svuint32_t svsub_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u64_m))) svuint64_t svsub_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u16_m))) svuint16_t svsub_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s8_m))) svint8_t svsub_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s32_m))) svint32_t svsub_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s64_m))) svint64_t svsub_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s16_m))) svint16_t svsub_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u8_x))) svuint8_t svsub_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u32_x))) svuint32_t svsub_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u64_x))) svuint64_t svsub_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u16_x))) svuint16_t svsub_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s8_x))) svint8_t svsub_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s32_x))) svint32_t svsub_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s64_x))) svint64_t svsub_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s16_x))) svint16_t svsub_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u8_z))) svuint8_t svsub_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u32_z))) svuint32_t svsub_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u64_z))) svuint64_t svsub_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_u16_z))) svuint16_t svsub_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s8_z))) svint8_t svsub_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s32_z))) svint32_t svsub_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s64_z))) svint64_t svsub_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_s16_z))) svint16_t svsub_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f64_m))) svfloat64_t svsub_m(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f32_m))) svfloat32_t svsub_m(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f16_m))) svfloat16_t svsub_m(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f64_x))) svfloat64_t svsub_x(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f32_x))) svfloat32_t svsub_x(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f16_x))) svfloat16_t svsub_x(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f64_z))) svfloat64_t svsub_z(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f32_z))) svfloat32_t svsub_z(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_f16_z))) svfloat16_t svsub_z(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u8_m))) svuint8_t svsub_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u32_m))) svuint32_t svsub_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u64_m))) svuint64_t svsub_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u16_m))) svuint16_t svsub_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s8_m))) svint8_t svsub_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s32_m))) svint32_t svsub_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s64_m))) svint64_t svsub_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s16_m))) svint16_t svsub_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u8_x))) svuint8_t svsub_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u32_x))) svuint32_t svsub_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u64_x))) svuint64_t svsub_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u16_x))) svuint16_t svsub_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s8_x))) svint8_t svsub_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s32_x))) svint32_t svsub_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s64_x))) svint64_t svsub_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s16_x))) svint16_t svsub_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u8_z))) svuint8_t svsub_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u32_z))) svuint32_t svsub_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u64_z))) svuint64_t svsub_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_u16_z))) svuint16_t svsub_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s8_z))) svint8_t svsub_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s32_z))) svint32_t svsub_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s64_z))) svint64_t svsub_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_s16_z))) svint16_t svsub_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f64_m))) svfloat64_t svsubr_m(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f32_m))) svfloat32_t svsubr_m(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f16_m))) svfloat16_t svsubr_m(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f64_x))) svfloat64_t svsubr_x(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f32_x))) svfloat32_t svsubr_x(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f16_x))) svfloat16_t svsubr_x(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f64_z))) svfloat64_t svsubr_z(svbool_t, svfloat64_t, float64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f32_z))) svfloat32_t svsubr_z(svbool_t, svfloat32_t, float32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_f16_z))) svfloat16_t svsubr_z(svbool_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u8_m))) svuint8_t svsubr_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u32_m))) svuint32_t svsubr_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u64_m))) svuint64_t svsubr_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u16_m))) svuint16_t svsubr_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s8_m))) svint8_t svsubr_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s32_m))) svint32_t svsubr_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s64_m))) svint64_t svsubr_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s16_m))) svint16_t svsubr_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u8_x))) svuint8_t svsubr_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u32_x))) svuint32_t svsubr_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u64_x))) svuint64_t svsubr_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u16_x))) svuint16_t svsubr_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s8_x))) svint8_t svsubr_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s32_x))) svint32_t svsubr_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s64_x))) svint64_t svsubr_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s16_x))) svint16_t svsubr_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u8_z))) svuint8_t svsubr_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u32_z))) svuint32_t svsubr_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u64_z))) svuint64_t svsubr_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_u16_z))) svuint16_t svsubr_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s8_z))) svint8_t svsubr_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s32_z))) svint32_t svsubr_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s64_z))) svint64_t svsubr_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_n_s16_z))) svint16_t svsubr_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f64_m))) svfloat64_t svsubr_m(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f32_m))) svfloat32_t svsubr_m(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f16_m))) svfloat16_t svsubr_m(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f64_x))) svfloat64_t svsubr_x(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f32_x))) svfloat32_t svsubr_x(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f16_x))) svfloat16_t svsubr_x(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f64_z))) svfloat64_t svsubr_z(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f32_z))) svfloat32_t svsubr_z(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_f16_z))) svfloat16_t svsubr_z(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u8_m))) svuint8_t svsubr_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u32_m))) svuint32_t svsubr_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u64_m))) svuint64_t svsubr_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u16_m))) svuint16_t svsubr_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s8_m))) svint8_t svsubr_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s32_m))) svint32_t svsubr_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s64_m))) svint64_t svsubr_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s16_m))) svint16_t svsubr_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u8_x))) svuint8_t svsubr_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u32_x))) svuint32_t svsubr_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u64_x))) svuint64_t svsubr_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u16_x))) svuint16_t svsubr_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s8_x))) svint8_t svsubr_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s32_x))) svint32_t svsubr_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s64_x))) svint64_t svsubr_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s16_x))) svint16_t svsubr_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u8_z))) svuint8_t svsubr_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u32_z))) svuint32_t svsubr_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u64_z))) svuint64_t svsubr_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_u16_z))) svuint16_t svsubr_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s8_z))) svint8_t svsubr_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s32_z))) svint32_t svsubr_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s64_z))) svint64_t svsubr_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubr_s16_z))) svint16_t svsubr_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_u8))) svuint8_t svtbl(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_u32))) svuint32_t svtbl(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_u64))) svuint64_t svtbl(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_u16))) svuint16_t svtbl(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_s8))) svint8_t svtbl(svint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_f64))) svfloat64_t svtbl(svfloat64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_f32))) svfloat32_t svtbl(svfloat32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_f16))) svfloat16_t svtbl(svfloat16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_s32))) svint32_t svtbl(svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_s64))) svint64_t svtbl(svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_s16))) svint16_t svtbl(svint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtmad_f64))) svfloat64_t svtmad(svfloat64_t, svfloat64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtmad_f32))) svfloat32_t svtmad(svfloat32_t, svfloat32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtmad_f16))) svfloat16_t svtmad(svfloat16_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_u8))) svuint8_t svtrn1(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_u32))) svuint32_t svtrn1(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_u64))) svuint64_t svtrn1(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_u16))) svuint16_t svtrn1(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_s8))) svint8_t svtrn1(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_f64))) svfloat64_t svtrn1(svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_f32))) svfloat32_t svtrn1(svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_f16))) svfloat16_t svtrn1(svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_s32))) svint32_t svtrn1(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_s64))) svint64_t svtrn1(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_s16))) svint16_t svtrn1(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_u8))) svuint8_t svtrn2(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_u32))) svuint32_t svtrn2(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_u64))) svuint64_t svtrn2(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_u16))) svuint16_t svtrn2(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_s8))) svint8_t svtrn2(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_f64))) svfloat64_t svtrn2(svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_f32))) svfloat32_t svtrn2(svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_f16))) svfloat16_t svtrn2(svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_s32))) svint32_t svtrn2(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_s64))) svint64_t svtrn2(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_s16))) svint16_t svtrn2(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtsmul_f64))) svfloat64_t svtsmul(svfloat64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtsmul_f32))) svfloat32_t svtsmul(svfloat32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtsmul_f16))) svfloat16_t svtsmul(svfloat16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtssel_f64))) svfloat64_t svtssel(svfloat64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtssel_f32))) svfloat32_t svtssel(svfloat32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtssel_f16))) svfloat16_t svtssel(svfloat16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpkhi_b))) svbool_t svunpkhi(svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpkhi_s32))) svint32_t svunpkhi(svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpkhi_s64))) svint64_t svunpkhi(svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpkhi_s16))) svint16_t svunpkhi(svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpkhi_u32))) svuint32_t svunpkhi(svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpkhi_u64))) svuint64_t svunpkhi(svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpkhi_u16))) svuint16_t svunpkhi(svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpklo_b))) svbool_t svunpklo(svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpklo_s32))) svint32_t svunpklo(svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpklo_s64))) svint64_t svunpklo(svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpklo_s16))) svint16_t svunpklo(svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpklo_u32))) svuint32_t svunpklo(svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpklo_u64))) svuint64_t svunpklo(svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpklo_u16))) svuint16_t svunpklo(svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_u8))) svuint8_t svuzp1(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_u32))) svuint32_t svuzp1(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_u64))) svuint64_t svuzp1(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_u16))) svuint16_t svuzp1(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_s8))) svint8_t svuzp1(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_f64))) svfloat64_t svuzp1(svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_f32))) svfloat32_t svuzp1(svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_f16))) svfloat16_t svuzp1(svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_s32))) svint32_t svuzp1(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_s64))) svint64_t svuzp1(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_s16))) svint16_t svuzp1(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_u8))) svuint8_t svuzp2(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_u32))) svuint32_t svuzp2(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_u64))) svuint64_t svuzp2(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_u16))) svuint16_t svuzp2(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_s8))) svint8_t svuzp2(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_f64))) svfloat64_t svuzp2(svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_f32))) svfloat32_t svuzp2(svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_f16))) svfloat16_t svuzp2(svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_s32))) svint32_t svuzp2(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_s64))) svint64_t svuzp2(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_s16))) svint16_t svuzp2(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_s32))) svbool_t svwhilele_b8(int32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_s32))) svbool_t svwhilele_b32(int32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_s32))) svbool_t svwhilele_b64(int32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_s32))) svbool_t svwhilele_b16(int32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_s64))) svbool_t svwhilele_b8(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_s64))) svbool_t svwhilele_b32(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_s64))) svbool_t svwhilele_b64(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_s64))) svbool_t svwhilele_b16(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_u32))) svbool_t svwhilele_b8(uint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_u32))) svbool_t svwhilele_b32(uint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_u32))) svbool_t svwhilele_b64(uint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_u32))) svbool_t svwhilele_b16(uint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_u64))) svbool_t svwhilele_b8(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_u64))) svbool_t svwhilele_b32(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_u64))) svbool_t svwhilele_b64(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_u64))) svbool_t svwhilele_b16(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_u32))) svbool_t svwhilelt_b8(uint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_u32))) svbool_t svwhilelt_b32(uint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_u32))) svbool_t svwhilelt_b64(uint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_u32))) svbool_t svwhilelt_b16(uint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_u64))) svbool_t svwhilelt_b8(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_u64))) svbool_t svwhilelt_b32(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_u64))) svbool_t svwhilelt_b64(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_u64))) svbool_t svwhilelt_b16(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_s32))) svbool_t svwhilelt_b8(int32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_s32))) svbool_t svwhilelt_b32(int32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_s32))) svbool_t svwhilelt_b64(int32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_s32))) svbool_t svwhilelt_b16(int32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_s64))) svbool_t svwhilelt_b8(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_s64))) svbool_t svwhilelt_b32(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_s64))) svbool_t svwhilelt_b64(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_s64))) svbool_t svwhilelt_b16(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_u8))) svuint8_t svzip1(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_u32))) svuint32_t svzip1(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_u64))) svuint64_t svzip1(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_u16))) svuint16_t svzip1(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_s8))) svint8_t svzip1(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_f64))) svfloat64_t svzip1(svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_f32))) svfloat32_t svzip1(svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_f16))) svfloat16_t svzip1(svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_s32))) svint32_t svzip1(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_s64))) svint64_t svzip1(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_s16))) svint16_t svzip1(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_u8))) svuint8_t svzip2(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_u32))) svuint32_t svzip2(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_u64))) svuint64_t svzip2(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_u16))) svuint16_t svzip2(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_s8))) svint8_t svzip2(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_f64))) svfloat64_t svzip2(svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_f32))) svfloat32_t svzip2(svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_f16))) svfloat16_t svzip2(svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_s32))) svint32_t svzip2(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_s64))) svint64_t svzip2(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_s16))) svint16_t svzip2(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_n_f32))) svfloat32_t svbfdot_n_f32(svfloat32_t, svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_f32))) svfloat32_t svbfdot_f32(svfloat32_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_lane_f32))) svfloat32_t svbfdot_lane_f32(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_n_f32))) svfloat32_t svbfmlalb_n_f32(svfloat32_t, svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_f32))) svfloat32_t svbfmlalb_f32(svfloat32_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_lane_f32))) svfloat32_t svbfmlalb_lane_f32(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_n_f32))) svfloat32_t svbfmlalt_n_f32(svfloat32_t, svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_f32))) svfloat32_t svbfmlalt_f32(svfloat32_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_lane_f32))) svfloat32_t svbfmlalt_lane_f32(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmmla_f32))) svfloat32_t svbfmmla_f32(svfloat32_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_bf16))) bfloat16_t svclasta_n_bf16(svbool_t, bfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_bf16))) svbfloat16_t svclasta_bf16(svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_bf16))) bfloat16_t svclastb_n_bf16(svbool_t, bfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_bf16))) svbfloat16_t svclastb_bf16(svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_bf16_m))) svuint16_t svcnt_bf16_m(svuint16_t, svbool_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_bf16_x))) svuint16_t svcnt_bf16_x(svbool_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_bf16_z))) svuint16_t svcnt_bf16_z(svbool_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_bf16))) svbfloat16x2_t svcreate2_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_bf16))) svbfloat16x3_t svcreate3_bf16(svbfloat16_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_bf16))) svbfloat16x4_t svcreate4_bf16(svbfloat16_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_m))) svbfloat16_t svcvt_bf16_f32_m(svbfloat16_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_x))) svbfloat16_t svcvt_bf16_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_z))) svbfloat16_t svcvt_bf16_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_bf16_f32_m))) svbfloat16_t svcvtnt_bf16_f32_m(svbfloat16_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_bf16))) svbfloat16_t svdup_n_bf16(bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_bf16_m))) svbfloat16_t svdup_n_bf16_m(svbfloat16_t, svbool_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_bf16_x))) svbfloat16_t svdup_n_bf16_x(svbool_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_bf16_z))) svbfloat16_t svdup_n_bf16_z(svbool_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_bf16))) svbfloat16_t svdup_lane_bf16(svbfloat16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_bf16))) svbfloat16_t svdupq_n_bf16(bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_bf16))) svbfloat16_t svdupq_lane_bf16(svbfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_bf16))) svbfloat16_t svext_bf16(svbfloat16_t, svbfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_bf16))) svbfloat16_t svget2_bf16(svbfloat16x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_bf16))) svbfloat16_t svget3_bf16(svbfloat16x3_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_bf16))) svbfloat16_t svget4_bf16(svbfloat16x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_bf16))) svbfloat16_t svinsr_n_bf16(svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_bf16))) bfloat16_t svlasta_bf16(svbool_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_bf16))) bfloat16_t svlastb_bf16(svbool_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16))) svbfloat16_t svld1_bf16(svbool_t, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16))) svbfloat16_t svld1_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_bf16))) svbfloat16_t svld1rq_bf16(svbool_t, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_bf16))) svbfloat16x2_t svld2_bf16(svbool_t, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_bf16))) svbfloat16x2_t svld2_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_bf16))) svbfloat16x3_t svld3_bf16(svbool_t, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_bf16))) svbfloat16x3_t svld3_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_bf16))) svbfloat16x4_t svld4_bf16(svbool_t, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_bf16))) svbfloat16x4_t svld4_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_bf16))) svbfloat16_t svldff1_bf16(svbool_t, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_bf16))) svbfloat16_t svldff1_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_bf16))) svbfloat16_t svldnf1_bf16(svbool_t, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_bf16))) svbfloat16_t svldnf1_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16))) svbfloat16_t svldnt1_bf16(svbool_t, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16))) svbfloat16_t svldnt1_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_bf16))) uint64_t svlen_bf16(svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_bf16))) svbfloat16_t svrev_bf16(svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_bf16))) svbfloat16_t svsel_bf16(svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_bf16))) svbfloat16x2_t svset2_bf16(svbfloat16x2_t, uint64_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_bf16))) svbfloat16x3_t svset3_bf16(svbfloat16x3_t, uint64_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_bf16))) svbfloat16x4_t svset4_bf16(svbfloat16x4_t, uint64_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_bf16))) svbfloat16_t svsplice_bf16(svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16))) void svst1_bf16(svbool_t, bfloat16_t *, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16))) void svst1_vnum_bf16(svbool_t, bfloat16_t *, int64_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_bf16))) void svst2_bf16(svbool_t, bfloat16_t *, svbfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_bf16))) void svst2_vnum_bf16(svbool_t, bfloat16_t *, int64_t, svbfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_bf16))) void svst3_bf16(svbool_t, bfloat16_t *, svbfloat16x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_bf16))) void svst3_vnum_bf16(svbool_t, bfloat16_t *, int64_t, svbfloat16x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_bf16))) void svst4_bf16(svbool_t, bfloat16_t *, svbfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_bf16))) void svst4_vnum_bf16(svbool_t, bfloat16_t *, int64_t, svbfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16))) void svstnt1_bf16(svbool_t, bfloat16_t *, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16))) void svstnt1_vnum_bf16(svbool_t, bfloat16_t *, int64_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_bf16))) svbfloat16_t svtbl_bf16(svbfloat16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_bf16))) svbfloat16_t svtrn1_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_bf16))) svbfloat16_t svtrn2_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_bf16))) svbfloat16x2_t svundef2_bf16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef3_bf16))) svbfloat16x3_t svundef3_bf16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_bf16))) svbfloat16x4_t svundef4_bf16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef_bf16))) svbfloat16_t svundef_bf16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_bf16))) svbfloat16_t svuzp1_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_bf16))) svbfloat16_t svuzp2_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_bf16))) svbfloat16_t svzip1_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_bf16))) svbfloat16_t svzip2_bf16(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_n_f32))) svfloat32_t svbfdot(svfloat32_t, svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_f32))) svfloat32_t svbfdot(svfloat32_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_lane_f32))) svfloat32_t svbfdot_lane(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_n_f32))) svfloat32_t svbfmlalb(svfloat32_t, svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_f32))) svfloat32_t svbfmlalb(svfloat32_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_lane_f32))) svfloat32_t svbfmlalb_lane(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_n_f32))) svfloat32_t svbfmlalt(svfloat32_t, svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_f32))) svfloat32_t svbfmlalt(svfloat32_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_lane_f32))) svfloat32_t svbfmlalt_lane(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmmla_f32))) svfloat32_t svbfmmla(svfloat32_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_bf16))) bfloat16_t svclasta(svbool_t, bfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_bf16))) svbfloat16_t svclasta(svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_bf16))) bfloat16_t svclastb(svbool_t, bfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_bf16))) svbfloat16_t svclastb(svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_bf16_m))) svuint16_t svcnt_m(svuint16_t, svbool_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_bf16_x))) svuint16_t svcnt_x(svbool_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_bf16_z))) svuint16_t svcnt_z(svbool_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_bf16))) svbfloat16x2_t svcreate2(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_bf16))) svbfloat16x3_t svcreate3(svbfloat16_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_bf16))) svbfloat16x4_t svcreate4(svbfloat16_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_m))) svbfloat16_t svcvt_bf16_m(svbfloat16_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_x))) svbfloat16_t svcvt_bf16_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_z))) svbfloat16_t svcvt_bf16_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_bf16_f32_m))) svbfloat16_t svcvtnt_bf16_m(svbfloat16_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_bf16))) svbfloat16_t svdup_bf16(bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_bf16_m))) svbfloat16_t svdup_bf16_m(svbfloat16_t, svbool_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_bf16_x))) svbfloat16_t svdup_bf16_x(svbool_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_bf16_z))) svbfloat16_t svdup_bf16_z(svbool_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_bf16))) svbfloat16_t svdup_lane(svbfloat16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_bf16))) svbfloat16_t svdupq_bf16(bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_bf16))) svbfloat16_t svdupq_lane(svbfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_bf16))) svbfloat16_t svext(svbfloat16_t, svbfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_bf16))) svbfloat16_t svget2(svbfloat16x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_bf16))) svbfloat16_t svget3(svbfloat16x3_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_bf16))) svbfloat16_t svget4(svbfloat16x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_bf16))) svbfloat16_t svinsr(svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_bf16))) bfloat16_t svlasta(svbool_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_bf16))) bfloat16_t svlastb(svbool_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16))) svbfloat16_t svld1(svbool_t, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16))) svbfloat16_t svld1_vnum(svbool_t, bfloat16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_bf16))) svbfloat16_t svld1rq(svbool_t, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_bf16))) svbfloat16x2_t svld2(svbool_t, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_bf16))) svbfloat16x2_t svld2_vnum(svbool_t, bfloat16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_bf16))) svbfloat16x3_t svld3(svbool_t, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_bf16))) svbfloat16x3_t svld3_vnum(svbool_t, bfloat16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_bf16))) svbfloat16x4_t svld4(svbool_t, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_bf16))) svbfloat16x4_t svld4_vnum(svbool_t, bfloat16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_bf16))) svbfloat16_t svldff1(svbool_t, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_bf16))) svbfloat16_t svldff1_vnum(svbool_t, bfloat16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_bf16))) svbfloat16_t svldnf1(svbool_t, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_bf16))) svbfloat16_t svldnf1_vnum(svbool_t, bfloat16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16))) svbfloat16_t svldnt1(svbool_t, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16))) svbfloat16_t svldnt1_vnum(svbool_t, bfloat16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_bf16))) uint64_t svlen(svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_bf16))) svbfloat16_t svrev(svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_bf16))) svbfloat16_t svsel(svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_bf16))) svbfloat16x2_t svset2(svbfloat16x2_t, uint64_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_bf16))) svbfloat16x3_t svset3(svbfloat16x3_t, uint64_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_bf16))) svbfloat16x4_t svset4(svbfloat16x4_t, uint64_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_bf16))) svbfloat16_t svsplice(svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16))) void svst1(svbool_t, bfloat16_t *, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16))) void svst1_vnum(svbool_t, bfloat16_t *, int64_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_bf16))) void svst2(svbool_t, bfloat16_t *, svbfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_bf16))) void svst2_vnum(svbool_t, bfloat16_t *, int64_t, svbfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_bf16))) void svst3(svbool_t, bfloat16_t *, svbfloat16x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_bf16))) void svst3_vnum(svbool_t, bfloat16_t *, int64_t, svbfloat16x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_bf16))) void svst4(svbool_t, bfloat16_t *, svbfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_bf16))) void svst4_vnum(svbool_t, bfloat16_t *, int64_t, svbfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16))) void svstnt1(svbool_t, bfloat16_t *, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16))) void svstnt1_vnum(svbool_t, bfloat16_t *, int64_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_bf16))) svbfloat16_t svtbl(svbfloat16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_bf16))) svbfloat16_t svtrn1(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_bf16))) svbfloat16_t svtrn2(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_bf16))) svbfloat16_t svuzp1(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_bf16))) svbfloat16_t svuzp2(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_bf16))) svbfloat16_t svzip1(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_bf16))) svbfloat16_t svzip2(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_bf16))) svbfloat16_t svtrn1q_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_bf16))) svbfloat16_t svtrn2q_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_bf16))) svbfloat16_t svuzp1q_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_bf16))) svbfloat16_t svuzp2q_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_bf16))) svbfloat16_t svzip1q_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_bf16))) svbfloat16_t svzip2q_bf16(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_bf16))) svbfloat16_t svtrn1q(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_bf16))) svbfloat16_t svtrn2q(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_bf16))) svbfloat16_t svuzp1q(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_bf16))) svbfloat16_t svuzp2q(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_bf16))) svbfloat16_t svzip1q(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_bf16))) svbfloat16_t svzip2q(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f32))) svfloat32_t svmmla_f32(svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f32))) svfloat32_t svmmla(svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u8))) svuint8_t svld1ro_u8(svbool_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u32))) svuint32_t svld1ro_u32(svbool_t, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u64))) svuint64_t svld1ro_u64(svbool_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u16))) svuint16_t svld1ro_u16(svbool_t, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s8))) svint8_t svld1ro_s8(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f64))) svfloat64_t svld1ro_f64(svbool_t, float64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f32))) svfloat32_t svld1ro_f32(svbool_t, float32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f16))) svfloat16_t svld1ro_f16(svbool_t, float16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s32))) svint32_t svld1ro_s32(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s64))) svint64_t svld1ro_s64(svbool_t, int64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s16))) svint16_t svld1ro_s16(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f64))) svfloat64_t svmmla_f64(svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u8))) svuint8_t svtrn1q_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u32))) svuint32_t svtrn1q_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u64))) svuint64_t svtrn1q_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u16))) svuint16_t svtrn1q_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s8))) svint8_t svtrn1q_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f64))) svfloat64_t svtrn1q_f64(svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f32))) svfloat32_t svtrn1q_f32(svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f16))) svfloat16_t svtrn1q_f16(svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s32))) svint32_t svtrn1q_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s64))) svint64_t svtrn1q_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s16))) svint16_t svtrn1q_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u8))) svuint8_t svtrn2q_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u32))) svuint32_t svtrn2q_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u64))) svuint64_t svtrn2q_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u16))) svuint16_t svtrn2q_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s8))) svint8_t svtrn2q_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f64))) svfloat64_t svtrn2q_f64(svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f32))) svfloat32_t svtrn2q_f32(svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f16))) svfloat16_t svtrn2q_f16(svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s32))) svint32_t svtrn2q_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s64))) svint64_t svtrn2q_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s16))) svint16_t svtrn2q_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u8))) svuint8_t svuzp1q_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u32))) svuint32_t svuzp1q_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u64))) svuint64_t svuzp1q_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u16))) svuint16_t svuzp1q_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s8))) svint8_t svuzp1q_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f64))) svfloat64_t svuzp1q_f64(svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f32))) svfloat32_t svuzp1q_f32(svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f16))) svfloat16_t svuzp1q_f16(svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s32))) svint32_t svuzp1q_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s64))) svint64_t svuzp1q_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s16))) svint16_t svuzp1q_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u8))) svuint8_t svuzp2q_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u32))) svuint32_t svuzp2q_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u64))) svuint64_t svuzp2q_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u16))) svuint16_t svuzp2q_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s8))) svint8_t svuzp2q_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f64))) svfloat64_t svuzp2q_f64(svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f32))) svfloat32_t svuzp2q_f32(svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f16))) svfloat16_t svuzp2q_f16(svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s32))) svint32_t svuzp2q_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s64))) svint64_t svuzp2q_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s16))) svint16_t svuzp2q_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u8))) svuint8_t svzip1q_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u32))) svuint32_t svzip1q_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u64))) svuint64_t svzip1q_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u16))) svuint16_t svzip1q_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s8))) svint8_t svzip1q_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f64))) svfloat64_t svzip1q_f64(svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f32))) svfloat32_t svzip1q_f32(svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f16))) svfloat16_t svzip1q_f16(svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s32))) svint32_t svzip1q_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s64))) svint64_t svzip1q_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s16))) svint16_t svzip1q_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u8))) svuint8_t svzip2q_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u32))) svuint32_t svzip2q_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u64))) svuint64_t svzip2q_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u16))) svuint16_t svzip2q_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s8))) svint8_t svzip2q_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f64))) svfloat64_t svzip2q_f64(svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f32))) svfloat32_t svzip2q_f32(svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f16))) svfloat16_t svzip2q_f16(svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s32))) svint32_t svzip2q_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s64))) svint64_t svzip2q_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s16))) svint16_t svzip2q_s16(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u8))) svuint8_t svld1ro(svbool_t, uint8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u32))) svuint32_t svld1ro(svbool_t, uint32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u64))) svuint64_t svld1ro(svbool_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u16))) svuint16_t svld1ro(svbool_t, uint16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s8))) svint8_t svld1ro(svbool_t, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f64))) svfloat64_t svld1ro(svbool_t, float64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f32))) svfloat32_t svld1ro(svbool_t, float32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f16))) svfloat16_t svld1ro(svbool_t, float16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s32))) svint32_t svld1ro(svbool_t, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s64))) svint64_t svld1ro(svbool_t, int64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s16))) svint16_t svld1ro(svbool_t, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f64))) svfloat64_t svmmla(svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u8))) svuint8_t svtrn1q(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u32))) svuint32_t svtrn1q(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u64))) svuint64_t svtrn1q(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u16))) svuint16_t svtrn1q(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s8))) svint8_t svtrn1q(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f64))) svfloat64_t svtrn1q(svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f32))) svfloat32_t svtrn1q(svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f16))) svfloat16_t svtrn1q(svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s32))) svint32_t svtrn1q(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s64))) svint64_t svtrn1q(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s16))) svint16_t svtrn1q(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u8))) svuint8_t svtrn2q(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u32))) svuint32_t svtrn2q(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u64))) svuint64_t svtrn2q(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u16))) svuint16_t svtrn2q(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s8))) svint8_t svtrn2q(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f64))) svfloat64_t svtrn2q(svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f32))) svfloat32_t svtrn2q(svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f16))) svfloat16_t svtrn2q(svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s32))) svint32_t svtrn2q(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s64))) svint64_t svtrn2q(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s16))) svint16_t svtrn2q(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u8))) svuint8_t svuzp1q(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u32))) svuint32_t svuzp1q(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u64))) svuint64_t svuzp1q(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u16))) svuint16_t svuzp1q(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s8))) svint8_t svuzp1q(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f64))) svfloat64_t svuzp1q(svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f32))) svfloat32_t svuzp1q(svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f16))) svfloat16_t svuzp1q(svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s32))) svint32_t svuzp1q(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s64))) svint64_t svuzp1q(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s16))) svint16_t svuzp1q(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u8))) svuint8_t svuzp2q(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u32))) svuint32_t svuzp2q(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u64))) svuint64_t svuzp2q(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u16))) svuint16_t svuzp2q(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s8))) svint8_t svuzp2q(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f64))) svfloat64_t svuzp2q(svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f32))) svfloat32_t svuzp2q(svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f16))) svfloat16_t svuzp2q(svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s32))) svint32_t svuzp2q(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s64))) svint64_t svuzp2q(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s16))) svint16_t svuzp2q(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u8))) svuint8_t svzip1q(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u32))) svuint32_t svzip1q(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u64))) svuint64_t svzip1q(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u16))) svuint16_t svzip1q(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s8))) svint8_t svzip1q(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f64))) svfloat64_t svzip1q(svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f32))) svfloat32_t svzip1q(svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f16))) svfloat16_t svzip1q(svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s32))) svint32_t svzip1q(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s64))) svint64_t svzip1q(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s16))) svint16_t svzip1q(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u8))) svuint8_t svzip2q(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u32))) svuint32_t svzip2q(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u64))) svuint64_t svzip2q(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u16))) svuint16_t svzip2q(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s8))) svint8_t svzip2q(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f64))) svfloat64_t svzip2q(svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f32))) svfloat32_t svzip2q(svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f16))) svfloat16_t svzip2q(svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s32))) svint32_t svzip2q(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s64))) svint64_t svzip2q(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s16))) svint16_t svzip2q(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_bf16))) svbfloat16_t svld1ro_bf16(svbool_t, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_bf16))) svbfloat16_t svld1ro(svbool_t, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_s32))) svint32_t svmmla_s32(svint32_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_u32))) svuint32_t svmmla_u32(svuint32_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_n_s32))) svint32_t svsudot_n_s32(svint32_t, svint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_s32))) svint32_t svsudot_s32(svint32_t, svint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_lane_s32))) svint32_t svsudot_lane_s32(svint32_t, svint8_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_n_s32))) svint32_t svusdot_n_s32(svint32_t, svuint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_s32))) svint32_t svusdot_s32(svint32_t, svuint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_lane_s32))) svint32_t svusdot_lane_s32(svint32_t, svuint8_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusmmla_s32))) svint32_t svusmmla_s32(svint32_t, svuint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_s32))) svint32_t svmmla(svint32_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_u32))) svuint32_t svmmla(svuint32_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_n_s32))) svint32_t svsudot(svint32_t, svint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_s32))) svint32_t svsudot(svint32_t, svint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_lane_s32))) svint32_t svsudot_lane(svint32_t, svint8_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_n_s32))) svint32_t svusdot(svint32_t, svuint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_s32))) svint32_t svusdot(svint32_t, svuint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_lane_s32))) svint32_t svusdot_lane(svint32_t, svuint8_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusmmla_s32))) svint32_t svusmmla(svint32_t, svuint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_s8))) svint8_t svaba_n_s8(svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_s32))) svint32_t svaba_n_s32(svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_s64))) svint64_t svaba_n_s64(svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_s16))) svint16_t svaba_n_s16(svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_u8))) svuint8_t svaba_n_u8(svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_u32))) svuint32_t svaba_n_u32(svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_u64))) svuint64_t svaba_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_u16))) svuint16_t svaba_n_u16(svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_s8))) svint8_t svaba_s8(svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_s32))) svint32_t svaba_s32(svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_s64))) svint64_t svaba_s64(svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_s16))) svint16_t svaba_s16(svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_u8))) svuint8_t svaba_u8(svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_u32))) svuint32_t svaba_u32(svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_u64))) svuint64_t svaba_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_u16))) svuint16_t svaba_u16(svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_n_s32))) svint32_t svabalb_n_s32(svint32_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_n_s64))) svint64_t svabalb_n_s64(svint64_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_n_s16))) svint16_t svabalb_n_s16(svint16_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_n_u32))) svuint32_t svabalb_n_u32(svuint32_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_n_u64))) svuint64_t svabalb_n_u64(svuint64_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_n_u16))) svuint16_t svabalb_n_u16(svuint16_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_s32))) svint32_t svabalb_s32(svint32_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_s64))) svint64_t svabalb_s64(svint64_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_s16))) svint16_t svabalb_s16(svint16_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_u32))) svuint32_t svabalb_u32(svuint32_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_u64))) svuint64_t svabalb_u64(svuint64_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_u16))) svuint16_t svabalb_u16(svuint16_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_n_s32))) svint32_t svabalt_n_s32(svint32_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_n_s64))) svint64_t svabalt_n_s64(svint64_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_n_s16))) svint16_t svabalt_n_s16(svint16_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_n_u32))) svuint32_t svabalt_n_u32(svuint32_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_n_u64))) svuint64_t svabalt_n_u64(svuint64_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_n_u16))) svuint16_t svabalt_n_u16(svuint16_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_s32))) svint32_t svabalt_s32(svint32_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_s64))) svint64_t svabalt_s64(svint64_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_s16))) svint16_t svabalt_s16(svint16_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_u32))) svuint32_t svabalt_u32(svuint32_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_u64))) svuint64_t svabalt_u64(svuint64_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_u16))) svuint16_t svabalt_u16(svuint16_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_n_s32))) svint32_t svabdlb_n_s32(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_n_s64))) svint64_t svabdlb_n_s64(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_n_s16))) svint16_t svabdlb_n_s16(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_n_u32))) svuint32_t svabdlb_n_u32(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_n_u64))) svuint64_t svabdlb_n_u64(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_n_u16))) svuint16_t svabdlb_n_u16(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_s32))) svint32_t svabdlb_s32(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_s64))) svint64_t svabdlb_s64(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_s16))) svint16_t svabdlb_s16(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_u32))) svuint32_t svabdlb_u32(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_u64))) svuint64_t svabdlb_u64(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_u16))) svuint16_t svabdlb_u16(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_n_s32))) svint32_t svabdlt_n_s32(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_n_s64))) svint64_t svabdlt_n_s64(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_n_s16))) svint16_t svabdlt_n_s16(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_n_u32))) svuint32_t svabdlt_n_u32(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_n_u64))) svuint64_t svabdlt_n_u64(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_n_u16))) svuint16_t svabdlt_n_u16(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_s32))) svint32_t svabdlt_s32(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_s64))) svint64_t svabdlt_s64(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_s16))) svint16_t svabdlt_s16(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_u32))) svuint32_t svabdlt_u32(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_u64))) svuint64_t svabdlt_u64(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_u16))) svuint16_t svabdlt_u16(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s32_m))) svint32_t svadalp_s32_m(svbool_t, svint32_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s64_m))) svint64_t svadalp_s64_m(svbool_t, svint64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s16_m))) svint16_t svadalp_s16_m(svbool_t, svint16_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s32_x))) svint32_t svadalp_s32_x(svbool_t, svint32_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s64_x))) svint64_t svadalp_s64_x(svbool_t, svint64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s16_x))) svint16_t svadalp_s16_x(svbool_t, svint16_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s32_z))) svint32_t svadalp_s32_z(svbool_t, svint32_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s64_z))) svint64_t svadalp_s64_z(svbool_t, svint64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s16_z))) svint16_t svadalp_s16_z(svbool_t, svint16_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u32_m))) svuint32_t svadalp_u32_m(svbool_t, svuint32_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u64_m))) svuint64_t svadalp_u64_m(svbool_t, svuint64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u16_m))) svuint16_t svadalp_u16_m(svbool_t, svuint16_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u32_x))) svuint32_t svadalp_u32_x(svbool_t, svuint32_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u64_x))) svuint64_t svadalp_u64_x(svbool_t, svuint64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u16_x))) svuint16_t svadalp_u16_x(svbool_t, svuint16_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u32_z))) svuint32_t svadalp_u32_z(svbool_t, svuint32_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u64_z))) svuint64_t svadalp_u64_z(svbool_t, svuint64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u16_z))) svuint16_t svadalp_u16_z(svbool_t, svuint16_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadclb_n_u32))) svuint32_t svadclb_n_u32(svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadclb_n_u64))) svuint64_t svadclb_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadclb_u32))) svuint32_t svadclb_u32(svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadclb_u64))) svuint64_t svadclb_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadclt_n_u32))) svuint32_t svadclt_n_u32(svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadclt_n_u64))) svuint64_t svadclt_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadclt_u32))) svuint32_t svadclt_u32(svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadclt_u64))) svuint64_t svadclt_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_n_u32))) svuint16_t svaddhnb_n_u32(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_n_u64))) svuint32_t svaddhnb_n_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_n_u16))) svuint8_t svaddhnb_n_u16(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_n_s32))) svint16_t svaddhnb_n_s32(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_n_s64))) svint32_t svaddhnb_n_s64(svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_n_s16))) svint8_t svaddhnb_n_s16(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_u32))) svuint16_t svaddhnb_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_u64))) svuint32_t svaddhnb_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_u16))) svuint8_t svaddhnb_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_s32))) svint16_t svaddhnb_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_s64))) svint32_t svaddhnb_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_s16))) svint8_t svaddhnb_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_n_u32))) svuint16_t svaddhnt_n_u32(svuint16_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_n_u64))) svuint32_t svaddhnt_n_u64(svuint32_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_n_u16))) svuint8_t svaddhnt_n_u16(svuint8_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_n_s32))) svint16_t svaddhnt_n_s32(svint16_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_n_s64))) svint32_t svaddhnt_n_s64(svint32_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_n_s16))) svint8_t svaddhnt_n_s16(svint8_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_u32))) svuint16_t svaddhnt_u32(svuint16_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_u64))) svuint32_t svaddhnt_u64(svuint32_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_u16))) svuint8_t svaddhnt_u16(svuint8_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_s32))) svint16_t svaddhnt_s32(svint16_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_s64))) svint32_t svaddhnt_s64(svint32_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_s16))) svint8_t svaddhnt_s16(svint8_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_n_s32))) svint32_t svaddlb_n_s32(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_n_s64))) svint64_t svaddlb_n_s64(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_n_s16))) svint16_t svaddlb_n_s16(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_n_u32))) svuint32_t svaddlb_n_u32(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_n_u64))) svuint64_t svaddlb_n_u64(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_n_u16))) svuint16_t svaddlb_n_u16(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_s32))) svint32_t svaddlb_s32(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_s64))) svint64_t svaddlb_s64(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_s16))) svint16_t svaddlb_s16(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_u32))) svuint32_t svaddlb_u32(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_u64))) svuint64_t svaddlb_u64(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_u16))) svuint16_t svaddlb_u16(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlbt_n_s32))) svint32_t svaddlbt_n_s32(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlbt_n_s64))) svint64_t svaddlbt_n_s64(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlbt_n_s16))) svint16_t svaddlbt_n_s16(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlbt_s32))) svint32_t svaddlbt_s32(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlbt_s64))) svint64_t svaddlbt_s64(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlbt_s16))) svint16_t svaddlbt_s16(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_n_s32))) svint32_t svaddlt_n_s32(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_n_s64))) svint64_t svaddlt_n_s64(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_n_s16))) svint16_t svaddlt_n_s16(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_n_u32))) svuint32_t svaddlt_n_u32(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_n_u64))) svuint64_t svaddlt_n_u64(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_n_u16))) svuint16_t svaddlt_n_u16(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_s32))) svint32_t svaddlt_s32(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_s64))) svint64_t svaddlt_s64(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_s16))) svint16_t svaddlt_s16(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_u32))) svuint32_t svaddlt_u32(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_u64))) svuint64_t svaddlt_u64(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_u16))) svuint16_t svaddlt_u16(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_f64_m))) svfloat64_t svaddp_f64_m(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_f32_m))) svfloat32_t svaddp_f32_m(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_f16_m))) svfloat16_t svaddp_f16_m(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_f64_x))) svfloat64_t svaddp_f64_x(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_f32_x))) svfloat32_t svaddp_f32_x(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_f16_x))) svfloat16_t svaddp_f16_x(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_u8_m))) svuint8_t svaddp_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_u32_m))) svuint32_t svaddp_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_u64_m))) svuint64_t svaddp_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_u16_m))) svuint16_t svaddp_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_s8_m))) svint8_t svaddp_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_s32_m))) svint32_t svaddp_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_s64_m))) svint64_t svaddp_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_s16_m))) svint16_t svaddp_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_u8_x))) svuint8_t svaddp_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_u32_x))) svuint32_t svaddp_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_u64_x))) svuint64_t svaddp_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_u16_x))) svuint16_t svaddp_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_s8_x))) svint8_t svaddp_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_s32_x))) svint32_t svaddp_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_s64_x))) svint64_t svaddp_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_s16_x))) svint16_t svaddp_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_n_s32))) svint32_t svaddwb_n_s32(svint32_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_n_s64))) svint64_t svaddwb_n_s64(svint64_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_n_s16))) svint16_t svaddwb_n_s16(svint16_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_n_u32))) svuint32_t svaddwb_n_u32(svuint32_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_n_u64))) svuint64_t svaddwb_n_u64(svuint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_n_u16))) svuint16_t svaddwb_n_u16(svuint16_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_s32))) svint32_t svaddwb_s32(svint32_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_s64))) svint64_t svaddwb_s64(svint64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_s16))) svint16_t svaddwb_s16(svint16_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_u32))) svuint32_t svaddwb_u32(svuint32_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_u64))) svuint64_t svaddwb_u64(svuint64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_u16))) svuint16_t svaddwb_u16(svuint16_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_n_s32))) svint32_t svaddwt_n_s32(svint32_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_n_s64))) svint64_t svaddwt_n_s64(svint64_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_n_s16))) svint16_t svaddwt_n_s16(svint16_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_n_u32))) svuint32_t svaddwt_n_u32(svuint32_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_n_u64))) svuint64_t svaddwt_n_u64(svuint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_n_u16))) svuint16_t svaddwt_n_u16(svuint16_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_s32))) svint32_t svaddwt_s32(svint32_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_s64))) svint64_t svaddwt_s64(svint64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_s16))) svint16_t svaddwt_s16(svint16_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_u32))) svuint32_t svaddwt_u32(svuint32_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_u64))) svuint64_t svaddwt_u64(svuint64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_u16))) svuint16_t svaddwt_u16(svuint16_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_n_u8))) svuint8_t svbcax_n_u8(svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_n_u32))) svuint32_t svbcax_n_u32(svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_n_u64))) svuint64_t svbcax_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_n_u16))) svuint16_t svbcax_n_u16(svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_n_s8))) svint8_t svbcax_n_s8(svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_n_s32))) svint32_t svbcax_n_s32(svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_n_s64))) svint64_t svbcax_n_s64(svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_n_s16))) svint16_t svbcax_n_s16(svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_u8))) svuint8_t svbcax_u8(svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_u32))) svuint32_t svbcax_u32(svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_u64))) svuint64_t svbcax_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_u16))) svuint16_t svbcax_u16(svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_s8))) svint8_t svbcax_s8(svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_s32))) svint32_t svbcax_s32(svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_s64))) svint64_t svbcax_s64(svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_s16))) svint16_t svbcax_s16(svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_n_u8))) svuint8_t svbsl1n_n_u8(svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_n_u32))) svuint32_t svbsl1n_n_u32(svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_n_u64))) svuint64_t svbsl1n_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_n_u16))) svuint16_t svbsl1n_n_u16(svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_n_s8))) svint8_t svbsl1n_n_s8(svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_n_s32))) svint32_t svbsl1n_n_s32(svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_n_s64))) svint64_t svbsl1n_n_s64(svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_n_s16))) svint16_t svbsl1n_n_s16(svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_u8))) svuint8_t svbsl1n_u8(svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_u32))) svuint32_t svbsl1n_u32(svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_u64))) svuint64_t svbsl1n_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_u16))) svuint16_t svbsl1n_u16(svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_s8))) svint8_t svbsl1n_s8(svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_s32))) svint32_t svbsl1n_s32(svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_s64))) svint64_t svbsl1n_s64(svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_s16))) svint16_t svbsl1n_s16(svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_n_u8))) svuint8_t svbsl2n_n_u8(svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_n_u32))) svuint32_t svbsl2n_n_u32(svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_n_u64))) svuint64_t svbsl2n_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_n_u16))) svuint16_t svbsl2n_n_u16(svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_n_s8))) svint8_t svbsl2n_n_s8(svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_n_s32))) svint32_t svbsl2n_n_s32(svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_n_s64))) svint64_t svbsl2n_n_s64(svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_n_s16))) svint16_t svbsl2n_n_s16(svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_u8))) svuint8_t svbsl2n_u8(svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_u32))) svuint32_t svbsl2n_u32(svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_u64))) svuint64_t svbsl2n_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_u16))) svuint16_t svbsl2n_u16(svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_s8))) svint8_t svbsl2n_s8(svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_s32))) svint32_t svbsl2n_s32(svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_s64))) svint64_t svbsl2n_s64(svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_s16))) svint16_t svbsl2n_s16(svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_n_u8))) svuint8_t svbsl_n_u8(svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_n_u32))) svuint32_t svbsl_n_u32(svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_n_u64))) svuint64_t svbsl_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_n_u16))) svuint16_t svbsl_n_u16(svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_n_s8))) svint8_t svbsl_n_s8(svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_n_s32))) svint32_t svbsl_n_s32(svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_n_s64))) svint64_t svbsl_n_s64(svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_n_s16))) svint16_t svbsl_n_s16(svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_u8))) svuint8_t svbsl_u8(svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_u32))) svuint32_t svbsl_u32(svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_u64))) svuint64_t svbsl_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_u16))) svuint16_t svbsl_u16(svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_s8))) svint8_t svbsl_s8(svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_s32))) svint32_t svbsl_s32(svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_s64))) svint64_t svbsl_s64(svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_s16))) svint16_t svbsl_s16(svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_u8))) svuint8_t svcadd_u8(svuint8_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_u32))) svuint32_t svcadd_u32(svuint32_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_u64))) svuint64_t svcadd_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_u16))) svuint16_t svcadd_u16(svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_s8))) svint8_t svcadd_s8(svint8_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_s32))) svint32_t svcadd_s32(svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_s64))) svint64_t svcadd_s64(svint64_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_s16))) svint16_t svcadd_s16(svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcdot_s32))) svint32_t svcdot_s32(svint32_t, svint8_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcdot_s64))) svint64_t svcdot_s64(svint64_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcdot_lane_s32))) svint32_t svcdot_lane_s32(svint32_t, svint8_t, svint8_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcdot_lane_s64))) svint64_t svcdot_lane_s64(svint64_t, svint16_t, svint16_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_u8))) svuint8_t svcmla_u8(svuint8_t, svuint8_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_u32))) svuint32_t svcmla_u32(svuint32_t, svuint32_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_u64))) svuint64_t svcmla_u64(svuint64_t, svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_u16))) svuint16_t svcmla_u16(svuint16_t, svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_s8))) svint8_t svcmla_s8(svint8_t, svint8_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_s32))) svint32_t svcmla_s32(svint32_t, svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_s64))) svint64_t svcmla_s64(svint64_t, svint64_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_s16))) svint16_t svcmla_s16(svint16_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_lane_u32))) svuint32_t svcmla_lane_u32(svuint32_t, svuint32_t, svuint32_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_lane_u16))) svuint16_t svcmla_lane_u16(svuint16_t, svuint16_t, svuint16_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_lane_s32))) svint32_t svcmla_lane_s32(svint32_t, svint32_t, svint32_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_lane_s16))) svint16_t svcmla_lane_s16(svint16_t, svint16_t, svint16_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt_f32_f16_m))) svfloat32_t svcvtlt_f32_f16_m(svfloat32_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt_f32_f16_x))) svfloat32_t svcvtlt_f32_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt_f64_f32_m))) svfloat64_t svcvtlt_f64_f32_m(svfloat64_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt_f64_f32_x))) svfloat64_t svcvtlt_f64_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_f16_f32_m))) svfloat16_t svcvtnt_f16_f32_m(svfloat16_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_f32_f64_m))) svfloat32_t svcvtnt_f32_f64_m(svfloat32_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtx_f32_f64_m))) svfloat32_t svcvtx_f32_f64_m(svfloat32_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtx_f32_f64_x))) svfloat32_t svcvtx_f32_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtx_f32_f64_z))) svfloat32_t svcvtx_f32_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtxnt_f32_f64_m))) svfloat32_t svcvtxnt_f32_f64_m(svfloat32_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_n_u8))) svuint8_t sveor3_n_u8(svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_n_u32))) svuint32_t sveor3_n_u32(svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_n_u64))) svuint64_t sveor3_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_n_u16))) svuint16_t sveor3_n_u16(svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_n_s8))) svint8_t sveor3_n_s8(svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_n_s32))) svint32_t sveor3_n_s32(svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_n_s64))) svint64_t sveor3_n_s64(svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_n_s16))) svint16_t sveor3_n_s16(svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_u8))) svuint8_t sveor3_u8(svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_u32))) svuint32_t sveor3_u32(svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_u64))) svuint64_t sveor3_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_u16))) svuint16_t sveor3_u16(svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_s8))) svint8_t sveor3_s8(svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_s32))) svint32_t sveor3_s32(svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_s64))) svint64_t sveor3_s64(svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_s16))) svint16_t sveor3_s16(svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_n_u8))) svuint8_t sveorbt_n_u8(svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_n_u32))) svuint32_t sveorbt_n_u32(svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_n_u64))) svuint64_t sveorbt_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_n_u16))) svuint16_t sveorbt_n_u16(svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_n_s8))) svint8_t sveorbt_n_s8(svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_n_s32))) svint32_t sveorbt_n_s32(svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_n_s64))) svint64_t sveorbt_n_s64(svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_n_s16))) svint16_t sveorbt_n_s16(svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_u8))) svuint8_t sveorbt_u8(svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_u32))) svuint32_t sveorbt_u32(svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_u64))) svuint64_t sveorbt_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_u16))) svuint16_t sveorbt_u16(svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_s8))) svint8_t sveorbt_s8(svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_s32))) svint32_t sveorbt_s32(svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_s64))) svint64_t sveorbt_s64(svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_s16))) svint16_t sveorbt_s16(svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_n_u8))) svuint8_t sveortb_n_u8(svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_n_u32))) svuint32_t sveortb_n_u32(svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_n_u64))) svuint64_t sveortb_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_n_u16))) svuint16_t sveortb_n_u16(svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_n_s8))) svint8_t sveortb_n_s8(svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_n_s32))) svint32_t sveortb_n_s32(svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_n_s64))) svint64_t sveortb_n_s64(svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_n_s16))) svint16_t sveortb_n_s16(svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_u8))) svuint8_t sveortb_u8(svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_u32))) svuint32_t sveortb_u32(svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_u64))) svuint64_t sveortb_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_u16))) svuint16_t sveortb_u16(svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_s8))) svint8_t sveortb_s8(svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_s32))) svint32_t sveortb_s32(svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_s64))) svint64_t sveortb_s64(svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_s16))) svint16_t sveortb_s16(svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s8_m))) svint8_t svhadd_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s32_m))) svint32_t svhadd_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s64_m))) svint64_t svhadd_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s16_m))) svint16_t svhadd_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s8_x))) svint8_t svhadd_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s32_x))) svint32_t svhadd_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s64_x))) svint64_t svhadd_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s16_x))) svint16_t svhadd_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s8_z))) svint8_t svhadd_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s32_z))) svint32_t svhadd_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s64_z))) svint64_t svhadd_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s16_z))) svint16_t svhadd_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u8_m))) svuint8_t svhadd_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u32_m))) svuint32_t svhadd_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u64_m))) svuint64_t svhadd_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u16_m))) svuint16_t svhadd_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u8_x))) svuint8_t svhadd_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u32_x))) svuint32_t svhadd_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u64_x))) svuint64_t svhadd_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u16_x))) svuint16_t svhadd_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u8_z))) svuint8_t svhadd_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u32_z))) svuint32_t svhadd_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u64_z))) svuint64_t svhadd_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u16_z))) svuint16_t svhadd_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s8_m))) svint8_t svhadd_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s32_m))) svint32_t svhadd_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s64_m))) svint64_t svhadd_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s16_m))) svint16_t svhadd_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s8_x))) svint8_t svhadd_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s32_x))) svint32_t svhadd_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s64_x))) svint64_t svhadd_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s16_x))) svint16_t svhadd_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s8_z))) svint8_t svhadd_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s32_z))) svint32_t svhadd_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s64_z))) svint64_t svhadd_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s16_z))) svint16_t svhadd_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u8_m))) svuint8_t svhadd_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u32_m))) svuint32_t svhadd_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u64_m))) svuint64_t svhadd_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u16_m))) svuint16_t svhadd_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u8_x))) svuint8_t svhadd_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u32_x))) svuint32_t svhadd_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u64_x))) svuint64_t svhadd_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u16_x))) svuint16_t svhadd_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u8_z))) svuint8_t svhadd_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u32_z))) svuint32_t svhadd_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u64_z))) svuint64_t svhadd_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u16_z))) svuint16_t svhadd_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_u32_z))) svuint32_t svhistcnt_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_u64_z))) svuint64_t svhistcnt_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_s32_z))) svuint32_t svhistcnt_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_s64_z))) svuint64_t svhistcnt_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistseg_u8))) svuint8_t svhistseg_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistseg_s8))) svuint8_t svhistseg_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s8_m))) svint8_t svhsub_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s32_m))) svint32_t svhsub_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s64_m))) svint64_t svhsub_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s16_m))) svint16_t svhsub_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s8_x))) svint8_t svhsub_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s32_x))) svint32_t svhsub_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s64_x))) svint64_t svhsub_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s16_x))) svint16_t svhsub_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s8_z))) svint8_t svhsub_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s32_z))) svint32_t svhsub_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s64_z))) svint64_t svhsub_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s16_z))) svint16_t svhsub_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u8_m))) svuint8_t svhsub_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u32_m))) svuint32_t svhsub_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u64_m))) svuint64_t svhsub_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u16_m))) svuint16_t svhsub_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u8_x))) svuint8_t svhsub_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u32_x))) svuint32_t svhsub_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u64_x))) svuint64_t svhsub_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u16_x))) svuint16_t svhsub_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u8_z))) svuint8_t svhsub_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u32_z))) svuint32_t svhsub_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u64_z))) svuint64_t svhsub_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u16_z))) svuint16_t svhsub_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s8_m))) svint8_t svhsub_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s32_m))) svint32_t svhsub_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s64_m))) svint64_t svhsub_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s16_m))) svint16_t svhsub_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s8_x))) svint8_t svhsub_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s32_x))) svint32_t svhsub_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s64_x))) svint64_t svhsub_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s16_x))) svint16_t svhsub_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s8_z))) svint8_t svhsub_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s32_z))) svint32_t svhsub_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s64_z))) svint64_t svhsub_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s16_z))) svint16_t svhsub_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u8_m))) svuint8_t svhsub_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u32_m))) svuint32_t svhsub_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u64_m))) svuint64_t svhsub_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u16_m))) svuint16_t svhsub_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u8_x))) svuint8_t svhsub_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u32_x))) svuint32_t svhsub_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u64_x))) svuint64_t svhsub_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u16_x))) svuint16_t svhsub_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u8_z))) svuint8_t svhsub_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u32_z))) svuint32_t svhsub_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u64_z))) svuint64_t svhsub_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u16_z))) svuint16_t svhsub_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s8_m))) svint8_t svhsubr_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s32_m))) svint32_t svhsubr_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s64_m))) svint64_t svhsubr_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s16_m))) svint16_t svhsubr_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s8_x))) svint8_t svhsubr_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s32_x))) svint32_t svhsubr_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s64_x))) svint64_t svhsubr_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s16_x))) svint16_t svhsubr_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s8_z))) svint8_t svhsubr_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s32_z))) svint32_t svhsubr_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s64_z))) svint64_t svhsubr_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s16_z))) svint16_t svhsubr_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u8_m))) svuint8_t svhsubr_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u32_m))) svuint32_t svhsubr_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u64_m))) svuint64_t svhsubr_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u16_m))) svuint16_t svhsubr_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u8_x))) svuint8_t svhsubr_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u32_x))) svuint32_t svhsubr_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u64_x))) svuint64_t svhsubr_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u16_x))) svuint16_t svhsubr_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u8_z))) svuint8_t svhsubr_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u32_z))) svuint32_t svhsubr_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u64_z))) svuint64_t svhsubr_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u16_z))) svuint16_t svhsubr_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s8_m))) svint8_t svhsubr_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s32_m))) svint32_t svhsubr_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s64_m))) svint64_t svhsubr_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s16_m))) svint16_t svhsubr_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s8_x))) svint8_t svhsubr_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s32_x))) svint32_t svhsubr_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s64_x))) svint64_t svhsubr_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s16_x))) svint16_t svhsubr_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s8_z))) svint8_t svhsubr_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s32_z))) svint32_t svhsubr_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s64_z))) svint64_t svhsubr_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s16_z))) svint16_t svhsubr_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u8_m))) svuint8_t svhsubr_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u32_m))) svuint32_t svhsubr_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u64_m))) svuint64_t svhsubr_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u16_m))) svuint16_t svhsubr_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u8_x))) svuint8_t svhsubr_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u32_x))) svuint32_t svhsubr_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u64_x))) svuint64_t svhsubr_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u16_x))) svuint16_t svhsubr_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u8_z))) svuint8_t svhsubr_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u32_z))) svuint32_t svhsubr_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u64_z))) svuint64_t svhsubr_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u16_z))) svuint16_t svhsubr_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_u32))) svuint32_t svldnt1_gather_u32base_index_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_u64))) svuint64_t svldnt1_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_f64))) svfloat64_t svldnt1_gather_u64base_index_f64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_f32))) svfloat32_t svldnt1_gather_u32base_index_f32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_s32))) svint32_t svldnt1_gather_u32base_index_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_s64))) svint64_t svldnt1_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_u32))) svuint32_t svldnt1_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_u64))) svuint64_t svldnt1_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_f64))) svfloat64_t svldnt1_gather_u64base_offset_f64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_f32))) svfloat32_t svldnt1_gather_u32base_offset_f32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_s32))) svint32_t svldnt1_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_s64))) svint64_t svldnt1_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_u32))) svuint32_t svldnt1_gather_u32base_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_u64))) svuint64_t svldnt1_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_f64))) svfloat64_t svldnt1_gather_u64base_f64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_f32))) svfloat32_t svldnt1_gather_u32base_f32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_s32))) svint32_t svldnt1_gather_u32base_s32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_s64))) svint64_t svldnt1_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_u64))) svuint64_t svldnt1_gather_s64index_u64(svbool_t, uint64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_f64))) svfloat64_t svldnt1_gather_s64index_f64(svbool_t, float64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_s64))) svint64_t svldnt1_gather_s64index_s64(svbool_t, int64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_u64))) svuint64_t svldnt1_gather_u64index_u64(svbool_t, uint64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_f64))) svfloat64_t svldnt1_gather_u64index_f64(svbool_t, float64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_s64))) svint64_t svldnt1_gather_u64index_s64(svbool_t, int64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_u32))) svuint32_t svldnt1_gather_u32offset_u32(svbool_t, uint32_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_f32))) svfloat32_t svldnt1_gather_u32offset_f32(svbool_t, float32_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_s32))) svint32_t svldnt1_gather_u32offset_s32(svbool_t, int32_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_u64))) svuint64_t svldnt1_gather_s64offset_u64(svbool_t, uint64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_f64))) svfloat64_t svldnt1_gather_s64offset_f64(svbool_t, float64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_s64))) svint64_t svldnt1_gather_s64offset_s64(svbool_t, int64_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_u64))) svuint64_t svldnt1_gather_u64offset_u64(svbool_t, uint64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_f64))) svfloat64_t svldnt1_gather_u64offset_f64(svbool_t, float64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_s64))) svint64_t svldnt1_gather_u64offset_s64(svbool_t, int64_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_offset_u32))) svuint32_t svldnt1sb_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_offset_u64))) svuint64_t svldnt1sb_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_offset_s32))) svint32_t svldnt1sb_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_offset_s64))) svint64_t svldnt1sb_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_u32))) svuint32_t svldnt1sb_gather_u32base_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_u64))) svuint64_t svldnt1sb_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_s32))) svint32_t svldnt1sb_gather_u32base_s32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_s64))) svint64_t svldnt1sb_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32offset_u32))) svuint32_t svldnt1sb_gather_u32offset_u32(svbool_t, int8_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32offset_s32))) svint32_t svldnt1sb_gather_u32offset_s32(svbool_t, int8_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_s64offset_u64))) svuint64_t svldnt1sb_gather_s64offset_u64(svbool_t, int8_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_s64offset_s64))) svint64_t svldnt1sb_gather_s64offset_s64(svbool_t, int8_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64offset_u64))) svuint64_t svldnt1sb_gather_u64offset_u64(svbool_t, int8_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64offset_s64))) svint64_t svldnt1sb_gather_u64offset_s64(svbool_t, int8_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_index_u32))) svuint32_t svldnt1sh_gather_u32base_index_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_index_u64))) svuint64_t svldnt1sh_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_index_s32))) svint32_t svldnt1sh_gather_u32base_index_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_index_s64))) svint64_t svldnt1sh_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_offset_u32))) svuint32_t svldnt1sh_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_offset_u64))) svuint64_t svldnt1sh_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_offset_s32))) svint32_t svldnt1sh_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_offset_s64))) svint64_t svldnt1sh_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_u32))) svuint32_t svldnt1sh_gather_u32base_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_u64))) svuint64_t svldnt1sh_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_s32))) svint32_t svldnt1sh_gather_u32base_s32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_s64))) svint64_t svldnt1sh_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64index_u64))) svuint64_t svldnt1sh_gather_s64index_u64(svbool_t, int16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64index_s64))) svint64_t svldnt1sh_gather_s64index_s64(svbool_t, int16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64index_u64))) svuint64_t svldnt1sh_gather_u64index_u64(svbool_t, int16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64index_s64))) svint64_t svldnt1sh_gather_u64index_s64(svbool_t, int16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32offset_u32))) svuint32_t svldnt1sh_gather_u32offset_u32(svbool_t, int16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32offset_s32))) svint32_t svldnt1sh_gather_u32offset_s32(svbool_t, int16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64offset_u64))) svuint64_t svldnt1sh_gather_s64offset_u64(svbool_t, int16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64offset_s64))) svint64_t svldnt1sh_gather_s64offset_s64(svbool_t, int16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64offset_u64))) svuint64_t svldnt1sh_gather_u64offset_u64(svbool_t, int16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64offset_s64))) svint64_t svldnt1sh_gather_u64offset_s64(svbool_t, int16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_index_u64))) svuint64_t svldnt1sw_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_index_s64))) svint64_t svldnt1sw_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_offset_u64))) svuint64_t svldnt1sw_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_offset_s64))) svint64_t svldnt1sw_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_u64))) svuint64_t svldnt1sw_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_s64))) svint64_t svldnt1sw_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64index_u64))) svuint64_t svldnt1sw_gather_s64index_u64(svbool_t, int32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64index_s64))) svint64_t svldnt1sw_gather_s64index_s64(svbool_t, int32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64index_u64))) svuint64_t svldnt1sw_gather_u64index_u64(svbool_t, int32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64index_s64))) svint64_t svldnt1sw_gather_u64index_s64(svbool_t, int32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64offset_u64))) svuint64_t svldnt1sw_gather_s64offset_u64(svbool_t, int32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64offset_s64))) svint64_t svldnt1sw_gather_s64offset_s64(svbool_t, int32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64offset_u64))) svuint64_t svldnt1sw_gather_u64offset_u64(svbool_t, int32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64offset_s64))) svint64_t svldnt1sw_gather_u64offset_s64(svbool_t, int32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_offset_u32))) svuint32_t svldnt1ub_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_offset_u64))) svuint64_t svldnt1ub_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_offset_s32))) svint32_t svldnt1ub_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_offset_s64))) svint64_t svldnt1ub_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_u32))) svuint32_t svldnt1ub_gather_u32base_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_u64))) svuint64_t svldnt1ub_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_s32))) svint32_t svldnt1ub_gather_u32base_s32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_s64))) svint64_t svldnt1ub_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32offset_u32))) svuint32_t svldnt1ub_gather_u32offset_u32(svbool_t, uint8_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32offset_s32))) svint32_t svldnt1ub_gather_u32offset_s32(svbool_t, uint8_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_s64offset_u64))) svuint64_t svldnt1ub_gather_s64offset_u64(svbool_t, uint8_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_s64offset_s64))) svint64_t svldnt1ub_gather_s64offset_s64(svbool_t, uint8_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64offset_u64))) svuint64_t svldnt1ub_gather_u64offset_u64(svbool_t, uint8_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64offset_s64))) svint64_t svldnt1ub_gather_u64offset_s64(svbool_t, uint8_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_index_u32))) svuint32_t svldnt1uh_gather_u32base_index_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_index_u64))) svuint64_t svldnt1uh_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_index_s32))) svint32_t svldnt1uh_gather_u32base_index_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_index_s64))) svint64_t svldnt1uh_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_offset_u32))) svuint32_t svldnt1uh_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_offset_u64))) svuint64_t svldnt1uh_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_offset_s32))) svint32_t svldnt1uh_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_offset_s64))) svint64_t svldnt1uh_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_u32))) svuint32_t svldnt1uh_gather_u32base_u32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_u64))) svuint64_t svldnt1uh_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_s32))) svint32_t svldnt1uh_gather_u32base_s32(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_s64))) svint64_t svldnt1uh_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64index_u64))) svuint64_t svldnt1uh_gather_s64index_u64(svbool_t, uint16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64index_s64))) svint64_t svldnt1uh_gather_s64index_s64(svbool_t, uint16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64index_u64))) svuint64_t svldnt1uh_gather_u64index_u64(svbool_t, uint16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64index_s64))) svint64_t svldnt1uh_gather_u64index_s64(svbool_t, uint16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32offset_u32))) svuint32_t svldnt1uh_gather_u32offset_u32(svbool_t, uint16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32offset_s32))) svint32_t svldnt1uh_gather_u32offset_s32(svbool_t, uint16_t const *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64offset_u64))) svuint64_t svldnt1uh_gather_s64offset_u64(svbool_t, uint16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64offset_s64))) svint64_t svldnt1uh_gather_s64offset_s64(svbool_t, uint16_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64offset_u64))) svuint64_t svldnt1uh_gather_u64offset_u64(svbool_t, uint16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64offset_s64))) svint64_t svldnt1uh_gather_u64offset_s64(svbool_t, uint16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_index_u64))) svuint64_t svldnt1uw_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_index_s64))) svint64_t svldnt1uw_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_offset_u64))) svuint64_t svldnt1uw_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_offset_s64))) svint64_t svldnt1uw_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_u64))) svuint64_t svldnt1uw_gather_u64base_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_s64))) svint64_t svldnt1uw_gather_u64base_s64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64index_u64))) svuint64_t svldnt1uw_gather_s64index_u64(svbool_t, uint32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64index_s64))) svint64_t svldnt1uw_gather_s64index_s64(svbool_t, uint32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64index_u64))) svuint64_t svldnt1uw_gather_u64index_u64(svbool_t, uint32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64index_s64))) svint64_t svldnt1uw_gather_u64index_s64(svbool_t, uint32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64offset_u64))) svuint64_t svldnt1uw_gather_s64offset_u64(svbool_t, uint32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64offset_s64))) svint64_t svldnt1uw_gather_s64offset_s64(svbool_t, uint32_t const *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64offset_u64))) svuint64_t svldnt1uw_gather_u64offset_u64(svbool_t, uint32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64offset_s64))) svint64_t svldnt1uw_gather_u64offset_s64(svbool_t, uint32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f64_m))) svint64_t svlogb_f64_m(svint64_t, svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f32_m))) svint32_t svlogb_f32_m(svint32_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f16_m))) svint16_t svlogb_f16_m(svint16_t, svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f64_x))) svint64_t svlogb_f64_x(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f32_x))) svint32_t svlogb_f32_x(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f16_x))) svint16_t svlogb_f16_x(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f64_z))) svint64_t svlogb_f64_z(svbool_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f32_z))) svint32_t svlogb_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f16_z))) svint16_t svlogb_f16_z(svbool_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_u8))) svbool_t svmatch_u8(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_u16))) svbool_t svmatch_u16(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_s8))) svbool_t svmatch_s8(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_s16))) svbool_t svmatch_s16(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmp_f64_m))) svfloat64_t svmaxnmp_f64_m(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmp_f32_m))) svfloat32_t svmaxnmp_f32_m(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmp_f16_m))) svfloat16_t svmaxnmp_f16_m(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmp_f64_x))) svfloat64_t svmaxnmp_f64_x(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmp_f32_x))) svfloat32_t svmaxnmp_f32_x(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmp_f16_x))) svfloat16_t svmaxnmp_f16_x(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_f64_m))) svfloat64_t svmaxp_f64_m(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_f32_m))) svfloat32_t svmaxp_f32_m(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_f16_m))) svfloat16_t svmaxp_f16_m(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_f64_x))) svfloat64_t svmaxp_f64_x(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_f32_x))) svfloat32_t svmaxp_f32_x(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_f16_x))) svfloat16_t svmaxp_f16_x(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_s8_m))) svint8_t svmaxp_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_s32_m))) svint32_t svmaxp_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_s64_m))) svint64_t svmaxp_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_s16_m))) svint16_t svmaxp_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_s8_x))) svint8_t svmaxp_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_s32_x))) svint32_t svmaxp_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_s64_x))) svint64_t svmaxp_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_s16_x))) svint16_t svmaxp_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_u8_m))) svuint8_t svmaxp_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_u32_m))) svuint32_t svmaxp_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_u64_m))) svuint64_t svmaxp_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_u16_m))) svuint16_t svmaxp_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_u8_x))) svuint8_t svmaxp_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_u32_x))) svuint32_t svmaxp_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_u64_x))) svuint64_t svmaxp_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_u16_x))) svuint16_t svmaxp_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmp_f64_m))) svfloat64_t svminnmp_f64_m(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmp_f32_m))) svfloat32_t svminnmp_f32_m(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmp_f16_m))) svfloat16_t svminnmp_f16_m(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmp_f64_x))) svfloat64_t svminnmp_f64_x(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmp_f32_x))) svfloat32_t svminnmp_f32_x(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmp_f16_x))) svfloat16_t svminnmp_f16_x(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_f64_m))) svfloat64_t svminp_f64_m(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_f32_m))) svfloat32_t svminp_f32_m(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_f16_m))) svfloat16_t svminp_f16_m(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_f64_x))) svfloat64_t svminp_f64_x(svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_f32_x))) svfloat32_t svminp_f32_x(svbool_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_f16_x))) svfloat16_t svminp_f16_x(svbool_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_s8_m))) svint8_t svminp_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_s32_m))) svint32_t svminp_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_s64_m))) svint64_t svminp_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_s16_m))) svint16_t svminp_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_s8_x))) svint8_t svminp_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_s32_x))) svint32_t svminp_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_s64_x))) svint64_t svminp_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_s16_x))) svint16_t svminp_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_u8_m))) svuint8_t svminp_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_u32_m))) svuint32_t svminp_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_u64_m))) svuint64_t svminp_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_u16_m))) svuint16_t svminp_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_u8_x))) svuint8_t svminp_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_u32_x))) svuint32_t svminp_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_u64_x))) svuint64_t svminp_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_u16_x))) svuint16_t svminp_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_u32))) svuint32_t svmla_lane_u32(svuint32_t, svuint32_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_u64))) svuint64_t svmla_lane_u64(svuint64_t, svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_u16))) svuint16_t svmla_lane_u16(svuint16_t, svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_s32))) svint32_t svmla_lane_s32(svint32_t, svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_s64))) svint64_t svmla_lane_s64(svint64_t, svint64_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_s16))) svint16_t svmla_lane_s16(svint16_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_f32))) svfloat32_t svmlalb_n_f32(svfloat32_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_s32))) svint32_t svmlalb_n_s32(svint32_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_s64))) svint64_t svmlalb_n_s64(svint64_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_s16))) svint16_t svmlalb_n_s16(svint16_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_u32))) svuint32_t svmlalb_n_u32(svuint32_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_u64))) svuint64_t svmlalb_n_u64(svuint64_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_u16))) svuint16_t svmlalb_n_u16(svuint16_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_f32))) svfloat32_t svmlalb_f32(svfloat32_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_s32))) svint32_t svmlalb_s32(svint32_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_s64))) svint64_t svmlalb_s64(svint64_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_s16))) svint16_t svmlalb_s16(svint16_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_u32))) svuint32_t svmlalb_u32(svuint32_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_u64))) svuint64_t svmlalb_u64(svuint64_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_u16))) svuint16_t svmlalb_u16(svuint16_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_lane_f32))) svfloat32_t svmlalb_lane_f32(svfloat32_t, svfloat16_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_lane_s32))) svint32_t svmlalb_lane_s32(svint32_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_lane_s64))) svint64_t svmlalb_lane_s64(svint64_t, svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_lane_u32))) svuint32_t svmlalb_lane_u32(svuint32_t, svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_lane_u64))) svuint64_t svmlalb_lane_u64(svuint64_t, svuint32_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_f32))) svfloat32_t svmlalt_n_f32(svfloat32_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_s32))) svint32_t svmlalt_n_s32(svint32_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_s64))) svint64_t svmlalt_n_s64(svint64_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_s16))) svint16_t svmlalt_n_s16(svint16_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_u32))) svuint32_t svmlalt_n_u32(svuint32_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_u64))) svuint64_t svmlalt_n_u64(svuint64_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_u16))) svuint16_t svmlalt_n_u16(svuint16_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_f32))) svfloat32_t svmlalt_f32(svfloat32_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_s32))) svint32_t svmlalt_s32(svint32_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_s64))) svint64_t svmlalt_s64(svint64_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_s16))) svint16_t svmlalt_s16(svint16_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_u32))) svuint32_t svmlalt_u32(svuint32_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_u64))) svuint64_t svmlalt_u64(svuint64_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_u16))) svuint16_t svmlalt_u16(svuint16_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_lane_f32))) svfloat32_t svmlalt_lane_f32(svfloat32_t, svfloat16_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_lane_s32))) svint32_t svmlalt_lane_s32(svint32_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_lane_s64))) svint64_t svmlalt_lane_s64(svint64_t, svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_lane_u32))) svuint32_t svmlalt_lane_u32(svuint32_t, svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_lane_u64))) svuint64_t svmlalt_lane_u64(svuint64_t, svuint32_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_u32))) svuint32_t svmls_lane_u32(svuint32_t, svuint32_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_u64))) svuint64_t svmls_lane_u64(svuint64_t, svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_u16))) svuint16_t svmls_lane_u16(svuint16_t, svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_s32))) svint32_t svmls_lane_s32(svint32_t, svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_s64))) svint64_t svmls_lane_s64(svint64_t, svint64_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_s16))) svint16_t svmls_lane_s16(svint16_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_n_f32))) svfloat32_t svmlslb_n_f32(svfloat32_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_n_s32))) svint32_t svmlslb_n_s32(svint32_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_n_s64))) svint64_t svmlslb_n_s64(svint64_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_n_s16))) svint16_t svmlslb_n_s16(svint16_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_n_u32))) svuint32_t svmlslb_n_u32(svuint32_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_n_u64))) svuint64_t svmlslb_n_u64(svuint64_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_n_u16))) svuint16_t svmlslb_n_u16(svuint16_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_f32))) svfloat32_t svmlslb_f32(svfloat32_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_s32))) svint32_t svmlslb_s32(svint32_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_s64))) svint64_t svmlslb_s64(svint64_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_s16))) svint16_t svmlslb_s16(svint16_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_u32))) svuint32_t svmlslb_u32(svuint32_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_u64))) svuint64_t svmlslb_u64(svuint64_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_u16))) svuint16_t svmlslb_u16(svuint16_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_lane_f32))) svfloat32_t svmlslb_lane_f32(svfloat32_t, svfloat16_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_lane_s32))) svint32_t svmlslb_lane_s32(svint32_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_lane_s64))) svint64_t svmlslb_lane_s64(svint64_t, svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_lane_u32))) svuint32_t svmlslb_lane_u32(svuint32_t, svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_lane_u64))) svuint64_t svmlslb_lane_u64(svuint64_t, svuint32_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_n_f32))) svfloat32_t svmlslt_n_f32(svfloat32_t, svfloat16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_n_s32))) svint32_t svmlslt_n_s32(svint32_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_n_s64))) svint64_t svmlslt_n_s64(svint64_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_n_s16))) svint16_t svmlslt_n_s16(svint16_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_n_u32))) svuint32_t svmlslt_n_u32(svuint32_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_n_u64))) svuint64_t svmlslt_n_u64(svuint64_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_n_u16))) svuint16_t svmlslt_n_u16(svuint16_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_f32))) svfloat32_t svmlslt_f32(svfloat32_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_s32))) svint32_t svmlslt_s32(svint32_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_s64))) svint64_t svmlslt_s64(svint64_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_s16))) svint16_t svmlslt_s16(svint16_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_u32))) svuint32_t svmlslt_u32(svuint32_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_u64))) svuint64_t svmlslt_u64(svuint64_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_u16))) svuint16_t svmlslt_u16(svuint16_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_lane_f32))) svfloat32_t svmlslt_lane_f32(svfloat32_t, svfloat16_t, svfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_lane_s32))) svint32_t svmlslt_lane_s32(svint32_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_lane_s64))) svint64_t svmlslt_lane_s64(svint64_t, svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_lane_u32))) svuint32_t svmlslt_lane_u32(svuint32_t, svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_lane_u64))) svuint64_t svmlslt_lane_u64(svuint64_t, svuint32_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlb_s32))) svint32_t svmovlb_s32(svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlb_s64))) svint64_t svmovlb_s64(svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlb_s16))) svint16_t svmovlb_s16(svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlb_u32))) svuint32_t svmovlb_u32(svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlb_u64))) svuint64_t svmovlb_u64(svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlb_u16))) svuint16_t svmovlb_u16(svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlt_s32))) svint32_t svmovlt_s32(svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlt_s64))) svint64_t svmovlt_s64(svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlt_s16))) svint16_t svmovlt_s16(svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlt_u32))) svuint32_t svmovlt_u32(svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlt_u64))) svuint64_t svmovlt_u64(svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlt_u16))) svuint16_t svmovlt_u16(svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_u32))) svuint32_t svmul_lane_u32(svuint32_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_u64))) svuint64_t svmul_lane_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_u16))) svuint16_t svmul_lane_u16(svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_s32))) svint32_t svmul_lane_s32(svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_s64))) svint64_t svmul_lane_s64(svint64_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_s16))) svint16_t svmul_lane_s16(svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_n_s32))) svint32_t svmullb_n_s32(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_n_s64))) svint64_t svmullb_n_s64(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_n_s16))) svint16_t svmullb_n_s16(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_n_u32))) svuint32_t svmullb_n_u32(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_n_u64))) svuint64_t svmullb_n_u64(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_n_u16))) svuint16_t svmullb_n_u16(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_s32))) svint32_t svmullb_s32(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_s64))) svint64_t svmullb_s64(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_s16))) svint16_t svmullb_s16(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_u32))) svuint32_t svmullb_u32(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_u64))) svuint64_t svmullb_u64(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_u16))) svuint16_t svmullb_u16(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_lane_s32))) svint32_t svmullb_lane_s32(svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_lane_s64))) svint64_t svmullb_lane_s64(svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_lane_u32))) svuint32_t svmullb_lane_u32(svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_lane_u64))) svuint64_t svmullb_lane_u64(svuint32_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_n_s32))) svint32_t svmullt_n_s32(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_n_s64))) svint64_t svmullt_n_s64(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_n_s16))) svint16_t svmullt_n_s16(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_n_u32))) svuint32_t svmullt_n_u32(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_n_u64))) svuint64_t svmullt_n_u64(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_n_u16))) svuint16_t svmullt_n_u16(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_s32))) svint32_t svmullt_s32(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_s64))) svint64_t svmullt_s64(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_s16))) svint16_t svmullt_s16(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_u32))) svuint32_t svmullt_u32(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_u64))) svuint64_t svmullt_u64(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_u16))) svuint16_t svmullt_u16(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_lane_s32))) svint32_t svmullt_lane_s32(svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_lane_s64))) svint64_t svmullt_lane_s64(svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_lane_u32))) svuint32_t svmullt_lane_u32(svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_lane_u64))) svuint64_t svmullt_lane_u64(svuint32_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_n_u8))) svuint8_t svnbsl_n_u8(svuint8_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_n_u32))) svuint32_t svnbsl_n_u32(svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_n_u64))) svuint64_t svnbsl_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_n_u16))) svuint16_t svnbsl_n_u16(svuint16_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_n_s8))) svint8_t svnbsl_n_s8(svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_n_s32))) svint32_t svnbsl_n_s32(svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_n_s64))) svint64_t svnbsl_n_s64(svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_n_s16))) svint16_t svnbsl_n_s16(svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_u8))) svuint8_t svnbsl_u8(svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_u32))) svuint32_t svnbsl_u32(svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_u64))) svuint64_t svnbsl_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_u16))) svuint16_t svnbsl_u16(svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_s8))) svint8_t svnbsl_s8(svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_s32))) svint32_t svnbsl_s32(svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_s64))) svint64_t svnbsl_s64(svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_s16))) svint16_t svnbsl_s16(svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_u8))) svbool_t svnmatch_u8(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_u16))) svbool_t svnmatch_u16(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_s8))) svbool_t svnmatch_s8(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_s16))) svbool_t svnmatch_s16(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmul_n_u8))) svuint8_t svpmul_n_u8(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmul_u8))) svuint8_t svpmul_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_n_u64))) svuint64_t svpmullb_n_u64(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_n_u16))) svuint16_t svpmullb_n_u16(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_u64))) svuint64_t svpmullb_u64(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_u16))) svuint16_t svpmullb_u16(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_n_u8))) svuint8_t svpmullb_pair_n_u8(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_n_u32))) svuint32_t svpmullb_pair_n_u32(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_u8))) svuint8_t svpmullb_pair_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_u32))) svuint32_t svpmullb_pair_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_n_u64))) svuint64_t svpmullt_n_u64(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_n_u16))) svuint16_t svpmullt_n_u16(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_u64))) svuint64_t svpmullt_u64(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_u16))) svuint16_t svpmullt_u16(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_n_u8))) svuint8_t svpmullt_pair_n_u8(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_n_u32))) svuint32_t svpmullt_pair_n_u32(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_u8))) svuint8_t svpmullt_pair_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_u32))) svuint32_t svpmullt_pair_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s8_m))) svint8_t svqabs_s8_m(svint8_t, svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s32_m))) svint32_t svqabs_s32_m(svint32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s64_m))) svint64_t svqabs_s64_m(svint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s16_m))) svint16_t svqabs_s16_m(svint16_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s8_x))) svint8_t svqabs_s8_x(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s32_x))) svint32_t svqabs_s32_x(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s64_x))) svint64_t svqabs_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s16_x))) svint16_t svqabs_s16_x(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s8_z))) svint8_t svqabs_s8_z(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s32_z))) svint32_t svqabs_s32_z(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s64_z))) svint64_t svqabs_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s16_z))) svint16_t svqabs_s16_z(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s8_m))) svint8_t svqadd_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s32_m))) svint32_t svqadd_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s64_m))) svint64_t svqadd_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s16_m))) svint16_t svqadd_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s8_x))) svint8_t svqadd_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s32_x))) svint32_t svqadd_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s64_x))) svint64_t svqadd_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s16_x))) svint16_t svqadd_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s8_z))) svint8_t svqadd_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s32_z))) svint32_t svqadd_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s64_z))) svint64_t svqadd_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s16_z))) svint16_t svqadd_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u8_m))) svuint8_t svqadd_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u32_m))) svuint32_t svqadd_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u64_m))) svuint64_t svqadd_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u16_m))) svuint16_t svqadd_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u8_x))) svuint8_t svqadd_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u32_x))) svuint32_t svqadd_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u64_x))) svuint64_t svqadd_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u16_x))) svuint16_t svqadd_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u8_z))) svuint8_t svqadd_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u32_z))) svuint32_t svqadd_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u64_z))) svuint64_t svqadd_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u16_z))) svuint16_t svqadd_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s8_m))) svint8_t svqadd_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s32_m))) svint32_t svqadd_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s64_m))) svint64_t svqadd_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s16_m))) svint16_t svqadd_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s8_x))) svint8_t svqadd_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s32_x))) svint32_t svqadd_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s64_x))) svint64_t svqadd_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s16_x))) svint16_t svqadd_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s8_z))) svint8_t svqadd_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s32_z))) svint32_t svqadd_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s64_z))) svint64_t svqadd_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s16_z))) svint16_t svqadd_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u8_m))) svuint8_t svqadd_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u32_m))) svuint32_t svqadd_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u64_m))) svuint64_t svqadd_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u16_m))) svuint16_t svqadd_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u8_x))) svuint8_t svqadd_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u32_x))) svuint32_t svqadd_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u64_x))) svuint64_t svqadd_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u16_x))) svuint16_t svqadd_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u8_z))) svuint8_t svqadd_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u32_z))) svuint32_t svqadd_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u64_z))) svuint64_t svqadd_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u16_z))) svuint16_t svqadd_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcadd_s8))) svint8_t svqcadd_s8(svint8_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcadd_s32))) svint32_t svqcadd_s32(svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcadd_s64))) svint64_t svqcadd_s64(svint64_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcadd_s16))) svint16_t svqcadd_s16(svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalb_n_s32))) svint32_t svqdmlalb_n_s32(svint32_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalb_n_s64))) svint64_t svqdmlalb_n_s64(svint64_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalb_n_s16))) svint16_t svqdmlalb_n_s16(svint16_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalb_s32))) svint32_t svqdmlalb_s32(svint32_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalb_s64))) svint64_t svqdmlalb_s64(svint64_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalb_s16))) svint16_t svqdmlalb_s16(svint16_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalb_lane_s32))) svint32_t svqdmlalb_lane_s32(svint32_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalb_lane_s64))) svint64_t svqdmlalb_lane_s64(svint64_t, svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalbt_n_s32))) svint32_t svqdmlalbt_n_s32(svint32_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalbt_n_s64))) svint64_t svqdmlalbt_n_s64(svint64_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalbt_n_s16))) svint16_t svqdmlalbt_n_s16(svint16_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalbt_s32))) svint32_t svqdmlalbt_s32(svint32_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalbt_s64))) svint64_t svqdmlalbt_s64(svint64_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalbt_s16))) svint16_t svqdmlalbt_s16(svint16_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalt_n_s32))) svint32_t svqdmlalt_n_s32(svint32_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalt_n_s64))) svint64_t svqdmlalt_n_s64(svint64_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalt_n_s16))) svint16_t svqdmlalt_n_s16(svint16_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalt_s32))) svint32_t svqdmlalt_s32(svint32_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalt_s64))) svint64_t svqdmlalt_s64(svint64_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalt_s16))) svint16_t svqdmlalt_s16(svint16_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalt_lane_s32))) svint32_t svqdmlalt_lane_s32(svint32_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalt_lane_s64))) svint64_t svqdmlalt_lane_s64(svint64_t, svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslb_n_s32))) svint32_t svqdmlslb_n_s32(svint32_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslb_n_s64))) svint64_t svqdmlslb_n_s64(svint64_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslb_n_s16))) svint16_t svqdmlslb_n_s16(svint16_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslb_s32))) svint32_t svqdmlslb_s32(svint32_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslb_s64))) svint64_t svqdmlslb_s64(svint64_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslb_s16))) svint16_t svqdmlslb_s16(svint16_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslb_lane_s32))) svint32_t svqdmlslb_lane_s32(svint32_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslb_lane_s64))) svint64_t svqdmlslb_lane_s64(svint64_t, svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslbt_n_s32))) svint32_t svqdmlslbt_n_s32(svint32_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslbt_n_s64))) svint64_t svqdmlslbt_n_s64(svint64_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslbt_n_s16))) svint16_t svqdmlslbt_n_s16(svint16_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslbt_s32))) svint32_t svqdmlslbt_s32(svint32_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslbt_s64))) svint64_t svqdmlslbt_s64(svint64_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslbt_s16))) svint16_t svqdmlslbt_s16(svint16_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslt_n_s32))) svint32_t svqdmlslt_n_s32(svint32_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslt_n_s64))) svint64_t svqdmlslt_n_s64(svint64_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslt_n_s16))) svint16_t svqdmlslt_n_s16(svint16_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslt_s32))) svint32_t svqdmlslt_s32(svint32_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslt_s64))) svint64_t svqdmlslt_s64(svint64_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslt_s16))) svint16_t svqdmlslt_s16(svint16_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslt_lane_s32))) svint32_t svqdmlslt_lane_s32(svint32_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslt_lane_s64))) svint64_t svqdmlslt_lane_s64(svint64_t, svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_n_s8))) svint8_t svqdmulh_n_s8(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_n_s32))) svint32_t svqdmulh_n_s32(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_n_s64))) svint64_t svqdmulh_n_s64(svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_n_s16))) svint16_t svqdmulh_n_s16(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s8))) svint8_t svqdmulh_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s32))) svint32_t svqdmulh_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s64))) svint64_t svqdmulh_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s16))) svint16_t svqdmulh_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_lane_s32))) svint32_t svqdmulh_lane_s32(svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_lane_s64))) svint64_t svqdmulh_lane_s64(svint64_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_lane_s16))) svint16_t svqdmulh_lane_s16(svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullb_n_s32))) svint32_t svqdmullb_n_s32(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullb_n_s64))) svint64_t svqdmullb_n_s64(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullb_n_s16))) svint16_t svqdmullb_n_s16(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullb_s32))) svint32_t svqdmullb_s32(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullb_s64))) svint64_t svqdmullb_s64(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullb_s16))) svint16_t svqdmullb_s16(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullb_lane_s32))) svint32_t svqdmullb_lane_s32(svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullb_lane_s64))) svint64_t svqdmullb_lane_s64(svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullt_n_s32))) svint32_t svqdmullt_n_s32(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullt_n_s64))) svint64_t svqdmullt_n_s64(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullt_n_s16))) svint16_t svqdmullt_n_s16(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullt_s32))) svint32_t svqdmullt_s32(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullt_s64))) svint64_t svqdmullt_s64(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullt_s16))) svint16_t svqdmullt_s16(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullt_lane_s32))) svint32_t svqdmullt_lane_s32(svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullt_lane_s64))) svint64_t svqdmullt_lane_s64(svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s8_m))) svint8_t svqneg_s8_m(svint8_t, svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s32_m))) svint32_t svqneg_s32_m(svint32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s64_m))) svint64_t svqneg_s64_m(svint64_t, svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s16_m))) svint16_t svqneg_s16_m(svint16_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s8_x))) svint8_t svqneg_s8_x(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s32_x))) svint32_t svqneg_s32_x(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s64_x))) svint64_t svqneg_s64_x(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s16_x))) svint16_t svqneg_s16_x(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s8_z))) svint8_t svqneg_s8_z(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s32_z))) svint32_t svqneg_s32_z(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s64_z))) svint64_t svqneg_s64_z(svbool_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s16_z))) svint16_t svqneg_s16_z(svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdcmlah_s8))) svint8_t svqrdcmlah_s8(svint8_t, svint8_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdcmlah_s32))) svint32_t svqrdcmlah_s32(svint32_t, svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdcmlah_s64))) svint64_t svqrdcmlah_s64(svint64_t, svint64_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdcmlah_s16))) svint16_t svqrdcmlah_s16(svint16_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdcmlah_lane_s32))) svint32_t svqrdcmlah_lane_s32(svint32_t, svint32_t, svint32_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdcmlah_lane_s16))) svint16_t svqrdcmlah_lane_s16(svint16_t, svint16_t, svint16_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_n_s8))) svint8_t svqrdmlah_n_s8(svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_n_s32))) svint32_t svqrdmlah_n_s32(svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_n_s64))) svint64_t svqrdmlah_n_s64(svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_n_s16))) svint16_t svqrdmlah_n_s16(svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_s8))) svint8_t svqrdmlah_s8(svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_s32))) svint32_t svqrdmlah_s32(svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_s64))) svint64_t svqrdmlah_s64(svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_s16))) svint16_t svqrdmlah_s16(svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_lane_s32))) svint32_t svqrdmlah_lane_s32(svint32_t, svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_lane_s64))) svint64_t svqrdmlah_lane_s64(svint64_t, svint64_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_lane_s16))) svint16_t svqrdmlah_lane_s16(svint16_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_n_s8))) svint8_t svqrdmlsh_n_s8(svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_n_s32))) svint32_t svqrdmlsh_n_s32(svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_n_s64))) svint64_t svqrdmlsh_n_s64(svint64_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_n_s16))) svint16_t svqrdmlsh_n_s16(svint16_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_s8))) svint8_t svqrdmlsh_s8(svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_s32))) svint32_t svqrdmlsh_s32(svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_s64))) svint64_t svqrdmlsh_s64(svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_s16))) svint16_t svqrdmlsh_s16(svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_lane_s32))) svint32_t svqrdmlsh_lane_s32(svint32_t, svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_lane_s64))) svint64_t svqrdmlsh_lane_s64(svint64_t, svint64_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_lane_s16))) svint16_t svqrdmlsh_lane_s16(svint16_t, svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_n_s8))) svint8_t svqrdmulh_n_s8(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_n_s32))) svint32_t svqrdmulh_n_s32(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_n_s64))) svint64_t svqrdmulh_n_s64(svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_n_s16))) svint16_t svqrdmulh_n_s16(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_s8))) svint8_t svqrdmulh_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_s32))) svint32_t svqrdmulh_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_s64))) svint64_t svqrdmulh_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_s16))) svint16_t svqrdmulh_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_lane_s32))) svint32_t svqrdmulh_lane_s32(svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_lane_s64))) svint64_t svqrdmulh_lane_s64(svint64_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_lane_s16))) svint16_t svqrdmulh_lane_s16(svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s8_m))) svint8_t svqrshl_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s32_m))) svint32_t svqrshl_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s64_m))) svint64_t svqrshl_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s16_m))) svint16_t svqrshl_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s8_x))) svint8_t svqrshl_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s32_x))) svint32_t svqrshl_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s64_x))) svint64_t svqrshl_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s16_x))) svint16_t svqrshl_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s8_z))) svint8_t svqrshl_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s32_z))) svint32_t svqrshl_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s64_z))) svint64_t svqrshl_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s16_z))) svint16_t svqrshl_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u8_m))) svuint8_t svqrshl_n_u8_m(svbool_t, svuint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u32_m))) svuint32_t svqrshl_n_u32_m(svbool_t, svuint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u64_m))) svuint64_t svqrshl_n_u64_m(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u16_m))) svuint16_t svqrshl_n_u16_m(svbool_t, svuint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u8_x))) svuint8_t svqrshl_n_u8_x(svbool_t, svuint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u32_x))) svuint32_t svqrshl_n_u32_x(svbool_t, svuint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u64_x))) svuint64_t svqrshl_n_u64_x(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u16_x))) svuint16_t svqrshl_n_u16_x(svbool_t, svuint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u8_z))) svuint8_t svqrshl_n_u8_z(svbool_t, svuint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u32_z))) svuint32_t svqrshl_n_u32_z(svbool_t, svuint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u64_z))) svuint64_t svqrshl_n_u64_z(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u16_z))) svuint16_t svqrshl_n_u16_z(svbool_t, svuint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s8_m))) svint8_t svqrshl_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s32_m))) svint32_t svqrshl_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s64_m))) svint64_t svqrshl_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s16_m))) svint16_t svqrshl_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s8_x))) svint8_t svqrshl_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s32_x))) svint32_t svqrshl_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s64_x))) svint64_t svqrshl_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s16_x))) svint16_t svqrshl_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s8_z))) svint8_t svqrshl_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s32_z))) svint32_t svqrshl_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s64_z))) svint64_t svqrshl_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s16_z))) svint16_t svqrshl_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u8_m))) svuint8_t svqrshl_u8_m(svbool_t, svuint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u32_m))) svuint32_t svqrshl_u32_m(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u64_m))) svuint64_t svqrshl_u64_m(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u16_m))) svuint16_t svqrshl_u16_m(svbool_t, svuint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u8_x))) svuint8_t svqrshl_u8_x(svbool_t, svuint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u32_x))) svuint32_t svqrshl_u32_x(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u64_x))) svuint64_t svqrshl_u64_x(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u16_x))) svuint16_t svqrshl_u16_x(svbool_t, svuint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u8_z))) svuint8_t svqrshl_u8_z(svbool_t, svuint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u32_z))) svuint32_t svqrshl_u32_z(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u64_z))) svuint64_t svqrshl_u64_z(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u16_z))) svuint16_t svqrshl_u16_z(svbool_t, svuint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnb_n_s32))) svint16_t svqrshrnb_n_s32(svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnb_n_s64))) svint32_t svqrshrnb_n_s64(svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnb_n_s16))) svint8_t svqrshrnb_n_s16(svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnb_n_u32))) svuint16_t svqrshrnb_n_u32(svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnb_n_u64))) svuint32_t svqrshrnb_n_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnb_n_u16))) svuint8_t svqrshrnb_n_u16(svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnt_n_s32))) svint16_t svqrshrnt_n_s32(svint16_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnt_n_s64))) svint32_t svqrshrnt_n_s64(svint32_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnt_n_s16))) svint8_t svqrshrnt_n_s16(svint8_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnt_n_u32))) svuint16_t svqrshrnt_n_u32(svuint16_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnt_n_u64))) svuint32_t svqrshrnt_n_u64(svuint32_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnt_n_u16))) svuint8_t svqrshrnt_n_u16(svuint8_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrunb_n_s32))) svuint16_t svqrshrunb_n_s32(svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrunb_n_s64))) svuint32_t svqrshrunb_n_s64(svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrunb_n_s16))) svuint8_t svqrshrunb_n_s16(svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrunt_n_s32))) svuint16_t svqrshrunt_n_s32(svuint16_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrunt_n_s64))) svuint32_t svqrshrunt_n_s64(svuint32_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrunt_n_s16))) svuint8_t svqrshrunt_n_s16(svuint8_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s8_m))) svint8_t svqshl_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s32_m))) svint32_t svqshl_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s64_m))) svint64_t svqshl_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s16_m))) svint16_t svqshl_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s8_x))) svint8_t svqshl_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s32_x))) svint32_t svqshl_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s64_x))) svint64_t svqshl_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s16_x))) svint16_t svqshl_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s8_z))) svint8_t svqshl_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s32_z))) svint32_t svqshl_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s64_z))) svint64_t svqshl_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s16_z))) svint16_t svqshl_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u8_m))) svuint8_t svqshl_n_u8_m(svbool_t, svuint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u32_m))) svuint32_t svqshl_n_u32_m(svbool_t, svuint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u64_m))) svuint64_t svqshl_n_u64_m(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u16_m))) svuint16_t svqshl_n_u16_m(svbool_t, svuint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u8_x))) svuint8_t svqshl_n_u8_x(svbool_t, svuint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u32_x))) svuint32_t svqshl_n_u32_x(svbool_t, svuint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u64_x))) svuint64_t svqshl_n_u64_x(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u16_x))) svuint16_t svqshl_n_u16_x(svbool_t, svuint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u8_z))) svuint8_t svqshl_n_u8_z(svbool_t, svuint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u32_z))) svuint32_t svqshl_n_u32_z(svbool_t, svuint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u64_z))) svuint64_t svqshl_n_u64_z(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u16_z))) svuint16_t svqshl_n_u16_z(svbool_t, svuint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s8_m))) svint8_t svqshl_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s32_m))) svint32_t svqshl_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s64_m))) svint64_t svqshl_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s16_m))) svint16_t svqshl_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s8_x))) svint8_t svqshl_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s32_x))) svint32_t svqshl_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s64_x))) svint64_t svqshl_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s16_x))) svint16_t svqshl_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s8_z))) svint8_t svqshl_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s32_z))) svint32_t svqshl_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s64_z))) svint64_t svqshl_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s16_z))) svint16_t svqshl_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u8_m))) svuint8_t svqshl_u8_m(svbool_t, svuint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u32_m))) svuint32_t svqshl_u32_m(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u64_m))) svuint64_t svqshl_u64_m(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u16_m))) svuint16_t svqshl_u16_m(svbool_t, svuint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u8_x))) svuint8_t svqshl_u8_x(svbool_t, svuint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u32_x))) svuint32_t svqshl_u32_x(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u64_x))) svuint64_t svqshl_u64_x(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u16_x))) svuint16_t svqshl_u16_x(svbool_t, svuint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u8_z))) svuint8_t svqshl_u8_z(svbool_t, svuint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u32_z))) svuint32_t svqshl_u32_z(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u64_z))) svuint64_t svqshl_u64_z(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u16_z))) svuint16_t svqshl_u16_z(svbool_t, svuint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s8_m))) svuint8_t svqshlu_n_s8_m(svbool_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s32_m))) svuint32_t svqshlu_n_s32_m(svbool_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s64_m))) svuint64_t svqshlu_n_s64_m(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s16_m))) svuint16_t svqshlu_n_s16_m(svbool_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s8_x))) svuint8_t svqshlu_n_s8_x(svbool_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s32_x))) svuint32_t svqshlu_n_s32_x(svbool_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s64_x))) svuint64_t svqshlu_n_s64_x(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s16_x))) svuint16_t svqshlu_n_s16_x(svbool_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s8_z))) svuint8_t svqshlu_n_s8_z(svbool_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s32_z))) svuint32_t svqshlu_n_s32_z(svbool_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s64_z))) svuint64_t svqshlu_n_s64_z(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s16_z))) svuint16_t svqshlu_n_s16_z(svbool_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnb_n_s32))) svint16_t svqshrnb_n_s32(svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnb_n_s64))) svint32_t svqshrnb_n_s64(svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnb_n_s16))) svint8_t svqshrnb_n_s16(svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnb_n_u32))) svuint16_t svqshrnb_n_u32(svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnb_n_u64))) svuint32_t svqshrnb_n_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnb_n_u16))) svuint8_t svqshrnb_n_u16(svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnt_n_s32))) svint16_t svqshrnt_n_s32(svint16_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnt_n_s64))) svint32_t svqshrnt_n_s64(svint32_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnt_n_s16))) svint8_t svqshrnt_n_s16(svint8_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnt_n_u32))) svuint16_t svqshrnt_n_u32(svuint16_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnt_n_u64))) svuint32_t svqshrnt_n_u64(svuint32_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnt_n_u16))) svuint8_t svqshrnt_n_u16(svuint8_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrunb_n_s32))) svuint16_t svqshrunb_n_s32(svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrunb_n_s64))) svuint32_t svqshrunb_n_s64(svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrunb_n_s16))) svuint8_t svqshrunb_n_s16(svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrunt_n_s32))) svuint16_t svqshrunt_n_s32(svuint16_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrunt_n_s64))) svuint32_t svqshrunt_n_s64(svuint32_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrunt_n_s16))) svuint8_t svqshrunt_n_s16(svuint8_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s8_m))) svint8_t svqsub_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s32_m))) svint32_t svqsub_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s64_m))) svint64_t svqsub_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s16_m))) svint16_t svqsub_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s8_x))) svint8_t svqsub_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s32_x))) svint32_t svqsub_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s64_x))) svint64_t svqsub_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s16_x))) svint16_t svqsub_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s8_z))) svint8_t svqsub_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s32_z))) svint32_t svqsub_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s64_z))) svint64_t svqsub_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s16_z))) svint16_t svqsub_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u8_m))) svuint8_t svqsub_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u32_m))) svuint32_t svqsub_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u64_m))) svuint64_t svqsub_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u16_m))) svuint16_t svqsub_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u8_x))) svuint8_t svqsub_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u32_x))) svuint32_t svqsub_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u64_x))) svuint64_t svqsub_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u16_x))) svuint16_t svqsub_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u8_z))) svuint8_t svqsub_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u32_z))) svuint32_t svqsub_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u64_z))) svuint64_t svqsub_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u16_z))) svuint16_t svqsub_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s8_m))) svint8_t svqsub_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s32_m))) svint32_t svqsub_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s64_m))) svint64_t svqsub_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s16_m))) svint16_t svqsub_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s8_x))) svint8_t svqsub_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s32_x))) svint32_t svqsub_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s64_x))) svint64_t svqsub_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s16_x))) svint16_t svqsub_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s8_z))) svint8_t svqsub_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s32_z))) svint32_t svqsub_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s64_z))) svint64_t svqsub_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s16_z))) svint16_t svqsub_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u8_m))) svuint8_t svqsub_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u32_m))) svuint32_t svqsub_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u64_m))) svuint64_t svqsub_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u16_m))) svuint16_t svqsub_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u8_x))) svuint8_t svqsub_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u32_x))) svuint32_t svqsub_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u64_x))) svuint64_t svqsub_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u16_x))) svuint16_t svqsub_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u8_z))) svuint8_t svqsub_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u32_z))) svuint32_t svqsub_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u64_z))) svuint64_t svqsub_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u16_z))) svuint16_t svqsub_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s8_m))) svint8_t svqsubr_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s32_m))) svint32_t svqsubr_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s64_m))) svint64_t svqsubr_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s16_m))) svint16_t svqsubr_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s8_x))) svint8_t svqsubr_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s32_x))) svint32_t svqsubr_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s64_x))) svint64_t svqsubr_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s16_x))) svint16_t svqsubr_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s8_z))) svint8_t svqsubr_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s32_z))) svint32_t svqsubr_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s64_z))) svint64_t svqsubr_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s16_z))) svint16_t svqsubr_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u8_m))) svuint8_t svqsubr_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u32_m))) svuint32_t svqsubr_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u64_m))) svuint64_t svqsubr_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u16_m))) svuint16_t svqsubr_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u8_x))) svuint8_t svqsubr_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u32_x))) svuint32_t svqsubr_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u64_x))) svuint64_t svqsubr_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u16_x))) svuint16_t svqsubr_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u8_z))) svuint8_t svqsubr_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u32_z))) svuint32_t svqsubr_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u64_z))) svuint64_t svqsubr_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u16_z))) svuint16_t svqsubr_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s8_m))) svint8_t svqsubr_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s32_m))) svint32_t svqsubr_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s64_m))) svint64_t svqsubr_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s16_m))) svint16_t svqsubr_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s8_x))) svint8_t svqsubr_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s32_x))) svint32_t svqsubr_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s64_x))) svint64_t svqsubr_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s16_x))) svint16_t svqsubr_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s8_z))) svint8_t svqsubr_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s32_z))) svint32_t svqsubr_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s64_z))) svint64_t svqsubr_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s16_z))) svint16_t svqsubr_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u8_m))) svuint8_t svqsubr_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u32_m))) svuint32_t svqsubr_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u64_m))) svuint64_t svqsubr_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u16_m))) svuint16_t svqsubr_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u8_x))) svuint8_t svqsubr_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u32_x))) svuint32_t svqsubr_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u64_x))) svuint64_t svqsubr_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u16_x))) svuint16_t svqsubr_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u8_z))) svuint8_t svqsubr_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u32_z))) svuint32_t svqsubr_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u64_z))) svuint64_t svqsubr_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u16_z))) svuint16_t svqsubr_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnb_s32))) svint16_t svqxtnb_s32(svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnb_s64))) svint32_t svqxtnb_s64(svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnb_s16))) svint8_t svqxtnb_s16(svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnb_u32))) svuint16_t svqxtnb_u32(svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnb_u64))) svuint32_t svqxtnb_u64(svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnb_u16))) svuint8_t svqxtnb_u16(svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnt_s32))) svint16_t svqxtnt_s32(svint16_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnt_s64))) svint32_t svqxtnt_s64(svint32_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnt_s16))) svint8_t svqxtnt_s16(svint8_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnt_u32))) svuint16_t svqxtnt_u32(svuint16_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnt_u64))) svuint32_t svqxtnt_u64(svuint32_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnt_u16))) svuint8_t svqxtnt_u16(svuint8_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtunb_s32))) svuint16_t svqxtunb_s32(svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtunb_s64))) svuint32_t svqxtunb_s64(svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtunb_s16))) svuint8_t svqxtunb_s16(svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtunt_s32))) svuint16_t svqxtunt_s32(svuint16_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtunt_s64))) svuint32_t svqxtunt_s64(svuint32_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtunt_s16))) svuint8_t svqxtunt_s16(svuint8_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_n_u32))) svuint16_t svraddhnb_n_u32(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_n_u64))) svuint32_t svraddhnb_n_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_n_u16))) svuint8_t svraddhnb_n_u16(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_n_s32))) svint16_t svraddhnb_n_s32(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_n_s64))) svint32_t svraddhnb_n_s64(svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_n_s16))) svint8_t svraddhnb_n_s16(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_u32))) svuint16_t svraddhnb_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_u64))) svuint32_t svraddhnb_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_u16))) svuint8_t svraddhnb_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_s32))) svint16_t svraddhnb_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_s64))) svint32_t svraddhnb_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_s16))) svint8_t svraddhnb_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_n_u32))) svuint16_t svraddhnt_n_u32(svuint16_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_n_u64))) svuint32_t svraddhnt_n_u64(svuint32_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_n_u16))) svuint8_t svraddhnt_n_u16(svuint8_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_n_s32))) svint16_t svraddhnt_n_s32(svint16_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_n_s64))) svint32_t svraddhnt_n_s64(svint32_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_n_s16))) svint8_t svraddhnt_n_s16(svint8_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_u32))) svuint16_t svraddhnt_u32(svuint16_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_u64))) svuint32_t svraddhnt_u64(svuint32_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_u16))) svuint8_t svraddhnt_u16(svuint8_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_s32))) svint16_t svraddhnt_s32(svint16_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_s64))) svint32_t svraddhnt_s64(svint32_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_s16))) svint8_t svraddhnt_s16(svint8_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpe_u32_m))) svuint32_t svrecpe_u32_m(svuint32_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpe_u32_x))) svuint32_t svrecpe_u32_x(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpe_u32_z))) svuint32_t svrecpe_u32_z(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s8_m))) svint8_t svrhadd_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s32_m))) svint32_t svrhadd_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s64_m))) svint64_t svrhadd_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s16_m))) svint16_t svrhadd_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s8_x))) svint8_t svrhadd_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s32_x))) svint32_t svrhadd_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s64_x))) svint64_t svrhadd_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s16_x))) svint16_t svrhadd_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s8_z))) svint8_t svrhadd_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s32_z))) svint32_t svrhadd_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s64_z))) svint64_t svrhadd_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s16_z))) svint16_t svrhadd_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u8_m))) svuint8_t svrhadd_n_u8_m(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u32_m))) svuint32_t svrhadd_n_u32_m(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u64_m))) svuint64_t svrhadd_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u16_m))) svuint16_t svrhadd_n_u16_m(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u8_x))) svuint8_t svrhadd_n_u8_x(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u32_x))) svuint32_t svrhadd_n_u32_x(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u64_x))) svuint64_t svrhadd_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u16_x))) svuint16_t svrhadd_n_u16_x(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u8_z))) svuint8_t svrhadd_n_u8_z(svbool_t, svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u32_z))) svuint32_t svrhadd_n_u32_z(svbool_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u64_z))) svuint64_t svrhadd_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u16_z))) svuint16_t svrhadd_n_u16_z(svbool_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s8_m))) svint8_t svrhadd_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s32_m))) svint32_t svrhadd_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s64_m))) svint64_t svrhadd_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s16_m))) svint16_t svrhadd_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s8_x))) svint8_t svrhadd_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s32_x))) svint32_t svrhadd_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s64_x))) svint64_t svrhadd_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s16_x))) svint16_t svrhadd_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s8_z))) svint8_t svrhadd_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s32_z))) svint32_t svrhadd_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s64_z))) svint64_t svrhadd_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s16_z))) svint16_t svrhadd_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u8_m))) svuint8_t svrhadd_u8_m(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u32_m))) svuint32_t svrhadd_u32_m(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u64_m))) svuint64_t svrhadd_u64_m(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u16_m))) svuint16_t svrhadd_u16_m(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u8_x))) svuint8_t svrhadd_u8_x(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u32_x))) svuint32_t svrhadd_u32_x(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u64_x))) svuint64_t svrhadd_u64_x(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u16_x))) svuint16_t svrhadd_u16_x(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u8_z))) svuint8_t svrhadd_u8_z(svbool_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u32_z))) svuint32_t svrhadd_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u64_z))) svuint64_t svrhadd_u64_z(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u16_z))) svuint16_t svrhadd_u16_z(svbool_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s8_m))) svint8_t svrshl_n_s8_m(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s32_m))) svint32_t svrshl_n_s32_m(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s64_m))) svint64_t svrshl_n_s64_m(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s16_m))) svint16_t svrshl_n_s16_m(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s8_x))) svint8_t svrshl_n_s8_x(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s32_x))) svint32_t svrshl_n_s32_x(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s64_x))) svint64_t svrshl_n_s64_x(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s16_x))) svint16_t svrshl_n_s16_x(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s8_z))) svint8_t svrshl_n_s8_z(svbool_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s32_z))) svint32_t svrshl_n_s32_z(svbool_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s64_z))) svint64_t svrshl_n_s64_z(svbool_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s16_z))) svint16_t svrshl_n_s16_z(svbool_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u8_m))) svuint8_t svrshl_n_u8_m(svbool_t, svuint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u32_m))) svuint32_t svrshl_n_u32_m(svbool_t, svuint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u64_m))) svuint64_t svrshl_n_u64_m(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u16_m))) svuint16_t svrshl_n_u16_m(svbool_t, svuint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u8_x))) svuint8_t svrshl_n_u8_x(svbool_t, svuint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u32_x))) svuint32_t svrshl_n_u32_x(svbool_t, svuint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u64_x))) svuint64_t svrshl_n_u64_x(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u16_x))) svuint16_t svrshl_n_u16_x(svbool_t, svuint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u8_z))) svuint8_t svrshl_n_u8_z(svbool_t, svuint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u32_z))) svuint32_t svrshl_n_u32_z(svbool_t, svuint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u64_z))) svuint64_t svrshl_n_u64_z(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u16_z))) svuint16_t svrshl_n_u16_z(svbool_t, svuint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s8_m))) svint8_t svrshl_s8_m(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s32_m))) svint32_t svrshl_s32_m(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s64_m))) svint64_t svrshl_s64_m(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s16_m))) svint16_t svrshl_s16_m(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s8_x))) svint8_t svrshl_s8_x(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s32_x))) svint32_t svrshl_s32_x(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s64_x))) svint64_t svrshl_s64_x(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s16_x))) svint16_t svrshl_s16_x(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s8_z))) svint8_t svrshl_s8_z(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s32_z))) svint32_t svrshl_s32_z(svbool_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s64_z))) svint64_t svrshl_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s16_z))) svint16_t svrshl_s16_z(svbool_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u8_m))) svuint8_t svrshl_u8_m(svbool_t, svuint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u32_m))) svuint32_t svrshl_u32_m(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u64_m))) svuint64_t svrshl_u64_m(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u16_m))) svuint16_t svrshl_u16_m(svbool_t, svuint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u8_x))) svuint8_t svrshl_u8_x(svbool_t, svuint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u32_x))) svuint32_t svrshl_u32_x(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u64_x))) svuint64_t svrshl_u64_x(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u16_x))) svuint16_t svrshl_u16_x(svbool_t, svuint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u8_z))) svuint8_t svrshl_u8_z(svbool_t, svuint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u32_z))) svuint32_t svrshl_u32_z(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u64_z))) svuint64_t svrshl_u64_z(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u16_z))) svuint16_t svrshl_u16_z(svbool_t, svuint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s8_m))) svint8_t svrshr_n_s8_m(svbool_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s32_m))) svint32_t svrshr_n_s32_m(svbool_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s64_m))) svint64_t svrshr_n_s64_m(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s16_m))) svint16_t svrshr_n_s16_m(svbool_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u8_m))) svuint8_t svrshr_n_u8_m(svbool_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u32_m))) svuint32_t svrshr_n_u32_m(svbool_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u64_m))) svuint64_t svrshr_n_u64_m(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u16_m))) svuint16_t svrshr_n_u16_m(svbool_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s8_x))) svint8_t svrshr_n_s8_x(svbool_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s32_x))) svint32_t svrshr_n_s32_x(svbool_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s64_x))) svint64_t svrshr_n_s64_x(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s16_x))) svint16_t svrshr_n_s16_x(svbool_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u8_x))) svuint8_t svrshr_n_u8_x(svbool_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u32_x))) svuint32_t svrshr_n_u32_x(svbool_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u64_x))) svuint64_t svrshr_n_u64_x(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u16_x))) svuint16_t svrshr_n_u16_x(svbool_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s8_z))) svint8_t svrshr_n_s8_z(svbool_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s32_z))) svint32_t svrshr_n_s32_z(svbool_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s64_z))) svint64_t svrshr_n_s64_z(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s16_z))) svint16_t svrshr_n_s16_z(svbool_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u8_z))) svuint8_t svrshr_n_u8_z(svbool_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u32_z))) svuint32_t svrshr_n_u32_z(svbool_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u64_z))) svuint64_t svrshr_n_u64_z(svbool_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u16_z))) svuint16_t svrshr_n_u16_z(svbool_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnb_n_u32))) svuint16_t svrshrnb_n_u32(svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnb_n_u64))) svuint32_t svrshrnb_n_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnb_n_u16))) svuint8_t svrshrnb_n_u16(svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnb_n_s32))) svint16_t svrshrnb_n_s32(svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnb_n_s64))) svint32_t svrshrnb_n_s64(svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnb_n_s16))) svint8_t svrshrnb_n_s16(svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnt_n_u32))) svuint16_t svrshrnt_n_u32(svuint16_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnt_n_u64))) svuint32_t svrshrnt_n_u64(svuint32_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnt_n_u16))) svuint8_t svrshrnt_n_u16(svuint8_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnt_n_s32))) svint16_t svrshrnt_n_s32(svint16_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnt_n_s64))) svint32_t svrshrnt_n_s64(svint32_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnt_n_s16))) svint8_t svrshrnt_n_s16(svint8_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrte_u32_m))) svuint32_t svrsqrte_u32_m(svuint32_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrte_u32_x))) svuint32_t svrsqrte_u32_x(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrte_u32_z))) svuint32_t svrsqrte_u32_z(svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsra_n_s8))) svint8_t svrsra_n_s8(svint8_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsra_n_s32))) svint32_t svrsra_n_s32(svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsra_n_s64))) svint64_t svrsra_n_s64(svint64_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsra_n_s16))) svint16_t svrsra_n_s16(svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsra_n_u8))) svuint8_t svrsra_n_u8(svuint8_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsra_n_u32))) svuint32_t svrsra_n_u32(svuint32_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsra_n_u64))) svuint64_t svrsra_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsra_n_u16))) svuint16_t svrsra_n_u16(svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_n_u32))) svuint16_t svrsubhnb_n_u32(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_n_u64))) svuint32_t svrsubhnb_n_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_n_u16))) svuint8_t svrsubhnb_n_u16(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_n_s32))) svint16_t svrsubhnb_n_s32(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_n_s64))) svint32_t svrsubhnb_n_s64(svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_n_s16))) svint8_t svrsubhnb_n_s16(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_u32))) svuint16_t svrsubhnb_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_u64))) svuint32_t svrsubhnb_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_u16))) svuint8_t svrsubhnb_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_s32))) svint16_t svrsubhnb_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_s64))) svint32_t svrsubhnb_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_s16))) svint8_t svrsubhnb_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_n_u32))) svuint16_t svrsubhnt_n_u32(svuint16_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_n_u64))) svuint32_t svrsubhnt_n_u64(svuint32_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_n_u16))) svuint8_t svrsubhnt_n_u16(svuint8_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_n_s32))) svint16_t svrsubhnt_n_s32(svint16_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_n_s64))) svint32_t svrsubhnt_n_s64(svint32_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_n_s16))) svint8_t svrsubhnt_n_s16(svint8_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_u32))) svuint16_t svrsubhnt_u32(svuint16_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_u64))) svuint32_t svrsubhnt_u64(svuint32_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_u16))) svuint8_t svrsubhnt_u16(svuint8_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_s32))) svint16_t svrsubhnt_s32(svint16_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_s64))) svint32_t svrsubhnt_s64(svint32_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_s16))) svint8_t svrsubhnt_s16(svint8_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsbclb_n_u32))) svuint32_t svsbclb_n_u32(svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsbclb_n_u64))) svuint64_t svsbclb_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsbclb_u32))) svuint32_t svsbclb_u32(svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsbclb_u64))) svuint64_t svsbclb_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsbclt_n_u32))) svuint32_t svsbclt_n_u32(svuint32_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsbclt_n_u64))) svuint64_t svsbclt_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsbclt_u32))) svuint32_t svsbclt_u32(svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsbclt_u64))) svuint64_t svsbclt_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllb_n_s32))) svint32_t svshllb_n_s32(svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllb_n_s64))) svint64_t svshllb_n_s64(svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllb_n_s16))) svint16_t svshllb_n_s16(svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllb_n_u32))) svuint32_t svshllb_n_u32(svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllb_n_u64))) svuint64_t svshllb_n_u64(svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllb_n_u16))) svuint16_t svshllb_n_u16(svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllt_n_s32))) svint32_t svshllt_n_s32(svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllt_n_s64))) svint64_t svshllt_n_s64(svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllt_n_s16))) svint16_t svshllt_n_s16(svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllt_n_u32))) svuint32_t svshllt_n_u32(svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllt_n_u64))) svuint64_t svshllt_n_u64(svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllt_n_u16))) svuint16_t svshllt_n_u16(svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnb_n_u32))) svuint16_t svshrnb_n_u32(svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnb_n_u64))) svuint32_t svshrnb_n_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnb_n_u16))) svuint8_t svshrnb_n_u16(svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnb_n_s32))) svint16_t svshrnb_n_s32(svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnb_n_s64))) svint32_t svshrnb_n_s64(svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnb_n_s16))) svint8_t svshrnb_n_s16(svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnt_n_u32))) svuint16_t svshrnt_n_u32(svuint16_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnt_n_u64))) svuint32_t svshrnt_n_u64(svuint32_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnt_n_u16))) svuint8_t svshrnt_n_u16(svuint8_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnt_n_s32))) svint16_t svshrnt_n_s32(svint16_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnt_n_s64))) svint32_t svshrnt_n_s64(svint32_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnt_n_s16))) svint8_t svshrnt_n_s16(svint8_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsli_n_u8))) svuint8_t svsli_n_u8(svuint8_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsli_n_u32))) svuint32_t svsli_n_u32(svuint32_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsli_n_u64))) svuint64_t svsli_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsli_n_u16))) svuint16_t svsli_n_u16(svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsli_n_s8))) svint8_t svsli_n_s8(svint8_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsli_n_s32))) svint32_t svsli_n_s32(svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsli_n_s64))) svint64_t svsli_n_s64(svint64_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsli_n_s16))) svint16_t svsli_n_s16(svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u8_m))) svuint8_t svsqadd_n_u8_m(svbool_t, svuint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u32_m))) svuint32_t svsqadd_n_u32_m(svbool_t, svuint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u64_m))) svuint64_t svsqadd_n_u64_m(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u16_m))) svuint16_t svsqadd_n_u16_m(svbool_t, svuint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u8_x))) svuint8_t svsqadd_n_u8_x(svbool_t, svuint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u32_x))) svuint32_t svsqadd_n_u32_x(svbool_t, svuint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u64_x))) svuint64_t svsqadd_n_u64_x(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u16_x))) svuint16_t svsqadd_n_u16_x(svbool_t, svuint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u8_z))) svuint8_t svsqadd_n_u8_z(svbool_t, svuint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u32_z))) svuint32_t svsqadd_n_u32_z(svbool_t, svuint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u64_z))) svuint64_t svsqadd_n_u64_z(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u16_z))) svuint16_t svsqadd_n_u16_z(svbool_t, svuint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u8_m))) svuint8_t svsqadd_u8_m(svbool_t, svuint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u32_m))) svuint32_t svsqadd_u32_m(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u64_m))) svuint64_t svsqadd_u64_m(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u16_m))) svuint16_t svsqadd_u16_m(svbool_t, svuint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u8_x))) svuint8_t svsqadd_u8_x(svbool_t, svuint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u32_x))) svuint32_t svsqadd_u32_x(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u64_x))) svuint64_t svsqadd_u64_x(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u16_x))) svuint16_t svsqadd_u16_x(svbool_t, svuint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u8_z))) svuint8_t svsqadd_u8_z(svbool_t, svuint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u32_z))) svuint32_t svsqadd_u32_z(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u64_z))) svuint64_t svsqadd_u64_z(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u16_z))) svuint16_t svsqadd_u16_z(svbool_t, svuint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsra_n_s8))) svint8_t svsra_n_s8(svint8_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsra_n_s32))) svint32_t svsra_n_s32(svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsra_n_s64))) svint64_t svsra_n_s64(svint64_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsra_n_s16))) svint16_t svsra_n_s16(svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsra_n_u8))) svuint8_t svsra_n_u8(svuint8_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsra_n_u32))) svuint32_t svsra_n_u32(svuint32_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsra_n_u64))) svuint64_t svsra_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsra_n_u16))) svuint16_t svsra_n_u16(svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsri_n_u8))) svuint8_t svsri_n_u8(svuint8_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsri_n_u32))) svuint32_t svsri_n_u32(svuint32_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsri_n_u64))) svuint64_t svsri_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsri_n_u16))) svuint16_t svsri_n_u16(svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsri_n_s8))) svint8_t svsri_n_s8(svint8_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsri_n_s32))) svint32_t svsri_n_s32(svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsri_n_s64))) svint64_t svsri_n_s64(svint64_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsri_n_s16))) svint16_t svsri_n_s16(svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_u32))) void svstnt1_scatter_u32base_index_u32(svbool_t, svuint32_t, int64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_u64))) void svstnt1_scatter_u64base_index_u64(svbool_t, svuint64_t, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_f64))) void svstnt1_scatter_u64base_index_f64(svbool_t, svuint64_t, int64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_f32))) void svstnt1_scatter_u32base_index_f32(svbool_t, svuint32_t, int64_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_s32))) void svstnt1_scatter_u32base_index_s32(svbool_t, svuint32_t, int64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_s64))) void svstnt1_scatter_u64base_index_s64(svbool_t, svuint64_t, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_u32))) void svstnt1_scatter_u32base_offset_u32(svbool_t, svuint32_t, int64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_u64))) void svstnt1_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_f64))) void svstnt1_scatter_u64base_offset_f64(svbool_t, svuint64_t, int64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_f32))) void svstnt1_scatter_u32base_offset_f32(svbool_t, svuint32_t, int64_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_s32))) void svstnt1_scatter_u32base_offset_s32(svbool_t, svuint32_t, int64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_s64))) void svstnt1_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_u32))) void svstnt1_scatter_u32base_u32(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_u64))) void svstnt1_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_f64))) void svstnt1_scatter_u64base_f64(svbool_t, svuint64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_f32))) void svstnt1_scatter_u32base_f32(svbool_t, svuint32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_s32))) void svstnt1_scatter_u32base_s32(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_s64))) void svstnt1_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_u64))) void svstnt1_scatter_s64index_u64(svbool_t, uint64_t *, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_f64))) void svstnt1_scatter_s64index_f64(svbool_t, float64_t *, svint64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_s64))) void svstnt1_scatter_s64index_s64(svbool_t, int64_t *, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_u64))) void svstnt1_scatter_u64index_u64(svbool_t, uint64_t *, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_f64))) void svstnt1_scatter_u64index_f64(svbool_t, float64_t *, svuint64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_s64))) void svstnt1_scatter_u64index_s64(svbool_t, int64_t *, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_u32))) void svstnt1_scatter_u32offset_u32(svbool_t, uint32_t *, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_f32))) void svstnt1_scatter_u32offset_f32(svbool_t, float32_t *, svuint32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_s32))) void svstnt1_scatter_u32offset_s32(svbool_t, int32_t *, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_u64))) void svstnt1_scatter_s64offset_u64(svbool_t, uint64_t *, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_f64))) void svstnt1_scatter_s64offset_f64(svbool_t, float64_t *, svint64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_s64))) void svstnt1_scatter_s64offset_s64(svbool_t, int64_t *, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_u64))) void svstnt1_scatter_u64offset_u64(svbool_t, uint64_t *, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_f64))) void svstnt1_scatter_u64offset_f64(svbool_t, float64_t *, svuint64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_s64))) void svstnt1_scatter_u64offset_s64(svbool_t, int64_t *, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_offset_u32))) void svstnt1b_scatter_u32base_offset_u32(svbool_t, svuint32_t, int64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_offset_u64))) void svstnt1b_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_offset_s32))) void svstnt1b_scatter_u32base_offset_s32(svbool_t, svuint32_t, int64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_offset_s64))) void svstnt1b_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_u32))) void svstnt1b_scatter_u32base_u32(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_u64))) void svstnt1b_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_s32))) void svstnt1b_scatter_u32base_s32(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_s64))) void svstnt1b_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32offset_s32))) void svstnt1b_scatter_u32offset_s32(svbool_t, int8_t *, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32offset_u32))) void svstnt1b_scatter_u32offset_u32(svbool_t, uint8_t *, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_s64offset_s64))) void svstnt1b_scatter_s64offset_s64(svbool_t, int8_t *, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_s64offset_u64))) void svstnt1b_scatter_s64offset_u64(svbool_t, uint8_t *, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64offset_s64))) void svstnt1b_scatter_u64offset_s64(svbool_t, int8_t *, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64offset_u64))) void svstnt1b_scatter_u64offset_u64(svbool_t, uint8_t *, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_index_u32))) void svstnt1h_scatter_u32base_index_u32(svbool_t, svuint32_t, int64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_index_u64))) void svstnt1h_scatter_u64base_index_u64(svbool_t, svuint64_t, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_index_s32))) void svstnt1h_scatter_u32base_index_s32(svbool_t, svuint32_t, int64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_index_s64))) void svstnt1h_scatter_u64base_index_s64(svbool_t, svuint64_t, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_offset_u32))) void svstnt1h_scatter_u32base_offset_u32(svbool_t, svuint32_t, int64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_offset_u64))) void svstnt1h_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_offset_s32))) void svstnt1h_scatter_u32base_offset_s32(svbool_t, svuint32_t, int64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_offset_s64))) void svstnt1h_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_u32))) void svstnt1h_scatter_u32base_u32(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_u64))) void svstnt1h_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_s32))) void svstnt1h_scatter_u32base_s32(svbool_t, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_s64))) void svstnt1h_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64index_s64))) void svstnt1h_scatter_s64index_s64(svbool_t, int16_t *, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64index_u64))) void svstnt1h_scatter_s64index_u64(svbool_t, uint16_t *, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64index_s64))) void svstnt1h_scatter_u64index_s64(svbool_t, int16_t *, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64index_u64))) void svstnt1h_scatter_u64index_u64(svbool_t, uint16_t *, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32offset_s32))) void svstnt1h_scatter_u32offset_s32(svbool_t, int16_t *, svuint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32offset_u32))) void svstnt1h_scatter_u32offset_u32(svbool_t, uint16_t *, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64offset_s64))) void svstnt1h_scatter_s64offset_s64(svbool_t, int16_t *, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64offset_u64))) void svstnt1h_scatter_s64offset_u64(svbool_t, uint16_t *, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64offset_s64))) void svstnt1h_scatter_u64offset_s64(svbool_t, int16_t *, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64offset_u64))) void svstnt1h_scatter_u64offset_u64(svbool_t, uint16_t *, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_index_u64))) void svstnt1w_scatter_u64base_index_u64(svbool_t, svuint64_t, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_index_s64))) void svstnt1w_scatter_u64base_index_s64(svbool_t, svuint64_t, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_offset_u64))) void svstnt1w_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_offset_s64))) void svstnt1w_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_u64))) void svstnt1w_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_s64))) void svstnt1w_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64index_s64))) void svstnt1w_scatter_s64index_s64(svbool_t, int32_t *, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64index_u64))) void svstnt1w_scatter_s64index_u64(svbool_t, uint32_t *, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64index_s64))) void svstnt1w_scatter_u64index_s64(svbool_t, int32_t *, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64index_u64))) void svstnt1w_scatter_u64index_u64(svbool_t, uint32_t *, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64offset_s64))) void svstnt1w_scatter_s64offset_s64(svbool_t, int32_t *, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64offset_u64))) void svstnt1w_scatter_s64offset_u64(svbool_t, uint32_t *, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64offset_s64))) void svstnt1w_scatter_u64offset_s64(svbool_t, int32_t *, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64offset_u64))) void svstnt1w_scatter_u64offset_u64(svbool_t, uint32_t *, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_n_u32))) svuint16_t svsubhnb_n_u32(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_n_u64))) svuint32_t svsubhnb_n_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_n_u16))) svuint8_t svsubhnb_n_u16(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_n_s32))) svint16_t svsubhnb_n_s32(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_n_s64))) svint32_t svsubhnb_n_s64(svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_n_s16))) svint8_t svsubhnb_n_s16(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_u32))) svuint16_t svsubhnb_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_u64))) svuint32_t svsubhnb_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_u16))) svuint8_t svsubhnb_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_s32))) svint16_t svsubhnb_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_s64))) svint32_t svsubhnb_s64(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_s16))) svint8_t svsubhnb_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_n_u32))) svuint16_t svsubhnt_n_u32(svuint16_t, svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_n_u64))) svuint32_t svsubhnt_n_u64(svuint32_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_n_u16))) svuint8_t svsubhnt_n_u16(svuint8_t, svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_n_s32))) svint16_t svsubhnt_n_s32(svint16_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_n_s64))) svint32_t svsubhnt_n_s64(svint32_t, svint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_n_s16))) svint8_t svsubhnt_n_s16(svint8_t, svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_u32))) svuint16_t svsubhnt_u32(svuint16_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_u64))) svuint32_t svsubhnt_u64(svuint32_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_u16))) svuint8_t svsubhnt_u16(svuint8_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_s32))) svint16_t svsubhnt_s32(svint16_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_s64))) svint32_t svsubhnt_s64(svint32_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_s16))) svint8_t svsubhnt_s16(svint8_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_n_s32))) svint32_t svsublb_n_s32(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_n_s64))) svint64_t svsublb_n_s64(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_n_s16))) svint16_t svsublb_n_s16(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_n_u32))) svuint32_t svsublb_n_u32(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_n_u64))) svuint64_t svsublb_n_u64(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_n_u16))) svuint16_t svsublb_n_u16(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_s32))) svint32_t svsublb_s32(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_s64))) svint64_t svsublb_s64(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_s16))) svint16_t svsublb_s16(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_u32))) svuint32_t svsublb_u32(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_u64))) svuint64_t svsublb_u64(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_u16))) svuint16_t svsublb_u16(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublbt_n_s32))) svint32_t svsublbt_n_s32(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublbt_n_s64))) svint64_t svsublbt_n_s64(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublbt_n_s16))) svint16_t svsublbt_n_s16(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublbt_s32))) svint32_t svsublbt_s32(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublbt_s64))) svint64_t svsublbt_s64(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublbt_s16))) svint16_t svsublbt_s16(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_n_s32))) svint32_t svsublt_n_s32(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_n_s64))) svint64_t svsublt_n_s64(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_n_s16))) svint16_t svsublt_n_s16(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_n_u32))) svuint32_t svsublt_n_u32(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_n_u64))) svuint64_t svsublt_n_u64(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_n_u16))) svuint16_t svsublt_n_u16(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_s32))) svint32_t svsublt_s32(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_s64))) svint64_t svsublt_s64(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_s16))) svint16_t svsublt_s16(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_u32))) svuint32_t svsublt_u32(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_u64))) svuint64_t svsublt_u64(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_u16))) svuint16_t svsublt_u16(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubltb_n_s32))) svint32_t svsubltb_n_s32(svint16_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubltb_n_s64))) svint64_t svsubltb_n_s64(svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubltb_n_s16))) svint16_t svsubltb_n_s16(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubltb_s32))) svint32_t svsubltb_s32(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubltb_s64))) svint64_t svsubltb_s64(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubltb_s16))) svint16_t svsubltb_s16(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_n_s32))) svint32_t svsubwb_n_s32(svint32_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_n_s64))) svint64_t svsubwb_n_s64(svint64_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_n_s16))) svint16_t svsubwb_n_s16(svint16_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_n_u32))) svuint32_t svsubwb_n_u32(svuint32_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_n_u64))) svuint64_t svsubwb_n_u64(svuint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_n_u16))) svuint16_t svsubwb_n_u16(svuint16_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_s32))) svint32_t svsubwb_s32(svint32_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_s64))) svint64_t svsubwb_s64(svint64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_s16))) svint16_t svsubwb_s16(svint16_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_u32))) svuint32_t svsubwb_u32(svuint32_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_u64))) svuint64_t svsubwb_u64(svuint64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_u16))) svuint16_t svsubwb_u16(svuint16_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_n_s32))) svint32_t svsubwt_n_s32(svint32_t, int16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_n_s64))) svint64_t svsubwt_n_s64(svint64_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_n_s16))) svint16_t svsubwt_n_s16(svint16_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_n_u32))) svuint32_t svsubwt_n_u32(svuint32_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_n_u64))) svuint64_t svsubwt_n_u64(svuint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_n_u16))) svuint16_t svsubwt_n_u16(svuint16_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_s32))) svint32_t svsubwt_s32(svint32_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_s64))) svint64_t svsubwt_s64(svint64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_s16))) svint16_t svsubwt_s16(svint16_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_u32))) svuint32_t svsubwt_u32(svuint32_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_u64))) svuint64_t svsubwt_u64(svuint64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_u16))) svuint16_t svsubwt_u16(svuint16_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_u8))) svuint8_t svtbl2_u8(svuint8x2_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_u32))) svuint32_t svtbl2_u32(svuint32x2_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_u64))) svuint64_t svtbl2_u64(svuint64x2_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_u16))) svuint16_t svtbl2_u16(svuint16x2_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_s8))) svint8_t svtbl2_s8(svint8x2_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_f64))) svfloat64_t svtbl2_f64(svfloat64x2_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_f32))) svfloat32_t svtbl2_f32(svfloat32x2_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_f16))) svfloat16_t svtbl2_f16(svfloat16x2_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_s32))) svint32_t svtbl2_s32(svint32x2_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_s64))) svint64_t svtbl2_s64(svint64x2_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_s16))) svint16_t svtbl2_s16(svint16x2_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_u8))) svuint8_t svtbx_u8(svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_u32))) svuint32_t svtbx_u32(svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_u64))) svuint64_t svtbx_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_u16))) svuint16_t svtbx_u16(svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_s8))) svint8_t svtbx_s8(svint8_t, svint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_f64))) svfloat64_t svtbx_f64(svfloat64_t, svfloat64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_f32))) svfloat32_t svtbx_f32(svfloat32_t, svfloat32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_f16))) svfloat16_t svtbx_f16(svfloat16_t, svfloat16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_s32))) svint32_t svtbx_s32(svint32_t, svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_s64))) svint64_t svtbx_s64(svint64_t, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_s16))) svint16_t svtbx_s16(svint16_t, svint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s8_m))) svint8_t svuqadd_n_s8_m(svbool_t, svint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s32_m))) svint32_t svuqadd_n_s32_m(svbool_t, svint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s64_m))) svint64_t svuqadd_n_s64_m(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s16_m))) svint16_t svuqadd_n_s16_m(svbool_t, svint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s8_x))) svint8_t svuqadd_n_s8_x(svbool_t, svint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s32_x))) svint32_t svuqadd_n_s32_x(svbool_t, svint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s64_x))) svint64_t svuqadd_n_s64_x(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s16_x))) svint16_t svuqadd_n_s16_x(svbool_t, svint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s8_z))) svint8_t svuqadd_n_s8_z(svbool_t, svint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s32_z))) svint32_t svuqadd_n_s32_z(svbool_t, svint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s64_z))) svint64_t svuqadd_n_s64_z(svbool_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s16_z))) svint16_t svuqadd_n_s16_z(svbool_t, svint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s8_m))) svint8_t svuqadd_s8_m(svbool_t, svint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s32_m))) svint32_t svuqadd_s32_m(svbool_t, svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s64_m))) svint64_t svuqadd_s64_m(svbool_t, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s16_m))) svint16_t svuqadd_s16_m(svbool_t, svint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s8_x))) svint8_t svuqadd_s8_x(svbool_t, svint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s32_x))) svint32_t svuqadd_s32_x(svbool_t, svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s64_x))) svint64_t svuqadd_s64_x(svbool_t, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s16_x))) svint16_t svuqadd_s16_x(svbool_t, svint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s8_z))) svint8_t svuqadd_s8_z(svbool_t, svint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s32_z))) svint32_t svuqadd_s32_z(svbool_t, svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s64_z))) svint64_t svuqadd_s64_z(svbool_t, svint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s16_z))) svint16_t svuqadd_s16_z(svbool_t, svint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_s32))) svbool_t svwhilege_b8_s32(int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_s32))) svbool_t svwhilege_b32_s32(int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_s32))) svbool_t svwhilege_b64_s32(int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_s32))) svbool_t svwhilege_b16_s32(int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_s64))) svbool_t svwhilege_b8_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_s64))) svbool_t svwhilege_b32_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_s64))) svbool_t svwhilege_b64_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_s64))) svbool_t svwhilege_b16_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_u32))) svbool_t svwhilege_b8_u32(uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_u32))) svbool_t svwhilege_b32_u32(uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_u32))) svbool_t svwhilege_b64_u32(uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_u32))) svbool_t svwhilege_b16_u32(uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_u64))) svbool_t svwhilege_b8_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_u64))) svbool_t svwhilege_b32_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_u64))) svbool_t svwhilege_b64_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_u64))) svbool_t svwhilege_b16_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_s32))) svbool_t svwhilegt_b8_s32(int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_s32))) svbool_t svwhilegt_b32_s32(int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_s32))) svbool_t svwhilegt_b64_s32(int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_s32))) svbool_t svwhilegt_b16_s32(int32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_s64))) svbool_t svwhilegt_b8_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_s64))) svbool_t svwhilegt_b32_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_s64))) svbool_t svwhilegt_b64_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_s64))) svbool_t svwhilegt_b16_s64(int64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_u32))) svbool_t svwhilegt_b8_u32(uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_u32))) svbool_t svwhilegt_b32_u32(uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_u32))) svbool_t svwhilegt_b64_u32(uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_u32))) svbool_t svwhilegt_b16_u32(uint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_u64))) svbool_t svwhilegt_b8_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_u64))) svbool_t svwhilegt_b32_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_u64))) svbool_t svwhilegt_b64_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_u64))) svbool_t svwhilegt_b16_u64(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_u8))) svbool_t svwhilerw_u8(uint8_t const *, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_s8))) svbool_t svwhilerw_s8(int8_t const *, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_u64))) svbool_t svwhilerw_u64(uint64_t const *, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_f64))) svbool_t svwhilerw_f64(float64_t const *, float64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_s64))) svbool_t svwhilerw_s64(int64_t const *, int64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_u16))) svbool_t svwhilerw_u16(uint16_t const *, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_f16))) svbool_t svwhilerw_f16(float16_t const *, float16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_s16))) svbool_t svwhilerw_s16(int16_t const *, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_u32))) svbool_t svwhilerw_u32(uint32_t const *, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_f32))) svbool_t svwhilerw_f32(float32_t const *, float32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_s32))) svbool_t svwhilerw_s32(int32_t const *, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_u8))) svbool_t svwhilewr_u8(uint8_t const *, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_s8))) svbool_t svwhilewr_s8(int8_t const *, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_u64))) svbool_t svwhilewr_u64(uint64_t const *, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_f64))) svbool_t svwhilewr_f64(float64_t const *, float64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_s64))) svbool_t svwhilewr_s64(int64_t const *, int64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_u16))) svbool_t svwhilewr_u16(uint16_t const *, uint16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_f16))) svbool_t svwhilewr_f16(float16_t const *, float16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_s16))) svbool_t svwhilewr_s16(int16_t const *, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_u32))) svbool_t svwhilewr_u32(uint32_t const *, uint32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_f32))) svbool_t svwhilewr_f32(float32_t const *, float32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_s32))) svbool_t svwhilewr_s32(int32_t const *, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_u8))) svuint8_t svxar_n_u8(svuint8_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_u32))) svuint32_t svxar_n_u32(svuint32_t, svuint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_u64))) svuint64_t svxar_n_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_u16))) svuint16_t svxar_n_u16(svuint16_t, svuint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_s8))) svint8_t svxar_n_s8(svint8_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_s32))) svint32_t svxar_n_s32(svint32_t, svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_s64))) svint64_t svxar_n_s64(svint64_t, svint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_s16))) svint16_t svxar_n_s16(svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_s8))) svint8_t svaba(svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_s32))) svint32_t svaba(svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_s64))) svint64_t svaba(svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_s16))) svint16_t svaba(svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_u8))) svuint8_t svaba(svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_u32))) svuint32_t svaba(svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_u64))) svuint64_t svaba(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_u16))) svuint16_t svaba(svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_s8))) svint8_t svaba(svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_s32))) svint32_t svaba(svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_s64))) svint64_t svaba(svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_s16))) svint16_t svaba(svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_u8))) svuint8_t svaba(svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_u32))) svuint32_t svaba(svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_u64))) svuint64_t svaba(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_u16))) svuint16_t svaba(svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_n_s32))) svint32_t svabalb(svint32_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_n_s64))) svint64_t svabalb(svint64_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_n_s16))) svint16_t svabalb(svint16_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_n_u32))) svuint32_t svabalb(svuint32_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_n_u64))) svuint64_t svabalb(svuint64_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_n_u16))) svuint16_t svabalb(svuint16_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_s32))) svint32_t svabalb(svint32_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_s64))) svint64_t svabalb(svint64_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_s16))) svint16_t svabalb(svint16_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_u32))) svuint32_t svabalb(svuint32_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_u64))) svuint64_t svabalb(svuint64_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalb_u16))) svuint16_t svabalb(svuint16_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_n_s32))) svint32_t svabalt(svint32_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_n_s64))) svint64_t svabalt(svint64_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_n_s16))) svint16_t svabalt(svint16_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_n_u32))) svuint32_t svabalt(svuint32_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_n_u64))) svuint64_t svabalt(svuint64_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_n_u16))) svuint16_t svabalt(svuint16_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_s32))) svint32_t svabalt(svint32_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_s64))) svint64_t svabalt(svint64_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_s16))) svint16_t svabalt(svint16_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_u32))) svuint32_t svabalt(svuint32_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_u64))) svuint64_t svabalt(svuint64_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabalt_u16))) svuint16_t svabalt(svuint16_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_n_s32))) svint32_t svabdlb(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_n_s64))) svint64_t svabdlb(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_n_s16))) svint16_t svabdlb(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_n_u32))) svuint32_t svabdlb(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_n_u64))) svuint64_t svabdlb(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_n_u16))) svuint16_t svabdlb(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_s32))) svint32_t svabdlb(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_s64))) svint64_t svabdlb(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_s16))) svint16_t svabdlb(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_u32))) svuint32_t svabdlb(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_u64))) svuint64_t svabdlb(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlb_u16))) svuint16_t svabdlb(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_n_s32))) svint32_t svabdlt(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_n_s64))) svint64_t svabdlt(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_n_s16))) svint16_t svabdlt(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_n_u32))) svuint32_t svabdlt(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_n_u64))) svuint64_t svabdlt(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_n_u16))) svuint16_t svabdlt(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_s32))) svint32_t svabdlt(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_s64))) svint64_t svabdlt(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_s16))) svint16_t svabdlt(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_u32))) svuint32_t svabdlt(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_u64))) svuint64_t svabdlt(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabdlt_u16))) svuint16_t svabdlt(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s32_m))) svint32_t svadalp_m(svbool_t, svint32_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s64_m))) svint64_t svadalp_m(svbool_t, svint64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s16_m))) svint16_t svadalp_m(svbool_t, svint16_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s32_x))) svint32_t svadalp_x(svbool_t, svint32_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s64_x))) svint64_t svadalp_x(svbool_t, svint64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s16_x))) svint16_t svadalp_x(svbool_t, svint16_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s32_z))) svint32_t svadalp_z(svbool_t, svint32_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s64_z))) svint64_t svadalp_z(svbool_t, svint64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_s16_z))) svint16_t svadalp_z(svbool_t, svint16_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u32_m))) svuint32_t svadalp_m(svbool_t, svuint32_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u64_m))) svuint64_t svadalp_m(svbool_t, svuint64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u16_m))) svuint16_t svadalp_m(svbool_t, svuint16_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u32_x))) svuint32_t svadalp_x(svbool_t, svuint32_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u64_x))) svuint64_t svadalp_x(svbool_t, svuint64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u16_x))) svuint16_t svadalp_x(svbool_t, svuint16_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u32_z))) svuint32_t svadalp_z(svbool_t, svuint32_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u64_z))) svuint64_t svadalp_z(svbool_t, svuint64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadalp_u16_z))) svuint16_t svadalp_z(svbool_t, svuint16_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadclb_n_u32))) svuint32_t svadclb(svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadclb_n_u64))) svuint64_t svadclb(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadclb_u32))) svuint32_t svadclb(svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadclb_u64))) svuint64_t svadclb(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadclt_n_u32))) svuint32_t svadclt(svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadclt_n_u64))) svuint64_t svadclt(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadclt_u32))) svuint32_t svadclt(svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadclt_u64))) svuint64_t svadclt(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_n_u32))) svuint16_t svaddhnb(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_n_u64))) svuint32_t svaddhnb(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_n_u16))) svuint8_t svaddhnb(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_n_s32))) svint16_t svaddhnb(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_n_s64))) svint32_t svaddhnb(svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_n_s16))) svint8_t svaddhnb(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_u32))) svuint16_t svaddhnb(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_u64))) svuint32_t svaddhnb(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_u16))) svuint8_t svaddhnb(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_s32))) svint16_t svaddhnb(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_s64))) svint32_t svaddhnb(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnb_s16))) svint8_t svaddhnb(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_n_u32))) svuint16_t svaddhnt(svuint16_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_n_u64))) svuint32_t svaddhnt(svuint32_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_n_u16))) svuint8_t svaddhnt(svuint8_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_n_s32))) svint16_t svaddhnt(svint16_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_n_s64))) svint32_t svaddhnt(svint32_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_n_s16))) svint8_t svaddhnt(svint8_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_u32))) svuint16_t svaddhnt(svuint16_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_u64))) svuint32_t svaddhnt(svuint32_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_u16))) svuint8_t svaddhnt(svuint8_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_s32))) svint16_t svaddhnt(svint16_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_s64))) svint32_t svaddhnt(svint32_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddhnt_s16))) svint8_t svaddhnt(svint8_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_n_s32))) svint32_t svaddlb(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_n_s64))) svint64_t svaddlb(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_n_s16))) svint16_t svaddlb(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_n_u32))) svuint32_t svaddlb(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_n_u64))) svuint64_t svaddlb(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_n_u16))) svuint16_t svaddlb(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_s32))) svint32_t svaddlb(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_s64))) svint64_t svaddlb(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_s16))) svint16_t svaddlb(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_u32))) svuint32_t svaddlb(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_u64))) svuint64_t svaddlb(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlb_u16))) svuint16_t svaddlb(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlbt_n_s32))) svint32_t svaddlbt(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlbt_n_s64))) svint64_t svaddlbt(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlbt_n_s16))) svint16_t svaddlbt(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlbt_s32))) svint32_t svaddlbt(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlbt_s64))) svint64_t svaddlbt(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlbt_s16))) svint16_t svaddlbt(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_n_s32))) svint32_t svaddlt(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_n_s64))) svint64_t svaddlt(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_n_s16))) svint16_t svaddlt(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_n_u32))) svuint32_t svaddlt(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_n_u64))) svuint64_t svaddlt(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_n_u16))) svuint16_t svaddlt(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_s32))) svint32_t svaddlt(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_s64))) svint64_t svaddlt(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_s16))) svint16_t svaddlt(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_u32))) svuint32_t svaddlt(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_u64))) svuint64_t svaddlt(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddlt_u16))) svuint16_t svaddlt(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_f64_m))) svfloat64_t svaddp_m(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_f32_m))) svfloat32_t svaddp_m(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_f16_m))) svfloat16_t svaddp_m(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_f64_x))) svfloat64_t svaddp_x(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_f32_x))) svfloat32_t svaddp_x(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_f16_x))) svfloat16_t svaddp_x(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_u8_m))) svuint8_t svaddp_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_u32_m))) svuint32_t svaddp_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_u64_m))) svuint64_t svaddp_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_u16_m))) svuint16_t svaddp_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_s8_m))) svint8_t svaddp_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_s32_m))) svint32_t svaddp_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_s64_m))) svint64_t svaddp_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_s16_m))) svint16_t svaddp_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_u8_x))) svuint8_t svaddp_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_u32_x))) svuint32_t svaddp_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_u64_x))) svuint64_t svaddp_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_u16_x))) svuint16_t svaddp_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_s8_x))) svint8_t svaddp_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_s32_x))) svint32_t svaddp_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_s64_x))) svint64_t svaddp_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddp_s16_x))) svint16_t svaddp_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_n_s32))) svint32_t svaddwb(svint32_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_n_s64))) svint64_t svaddwb(svint64_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_n_s16))) svint16_t svaddwb(svint16_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_n_u32))) svuint32_t svaddwb(svuint32_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_n_u64))) svuint64_t svaddwb(svuint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_n_u16))) svuint16_t svaddwb(svuint16_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_s32))) svint32_t svaddwb(svint32_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_s64))) svint64_t svaddwb(svint64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_s16))) svint16_t svaddwb(svint16_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_u32))) svuint32_t svaddwb(svuint32_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_u64))) svuint64_t svaddwb(svuint64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwb_u16))) svuint16_t svaddwb(svuint16_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_n_s32))) svint32_t svaddwt(svint32_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_n_s64))) svint64_t svaddwt(svint64_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_n_s16))) svint16_t svaddwt(svint16_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_n_u32))) svuint32_t svaddwt(svuint32_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_n_u64))) svuint64_t svaddwt(svuint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_n_u16))) svuint16_t svaddwt(svuint16_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_s32))) svint32_t svaddwt(svint32_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_s64))) svint64_t svaddwt(svint64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_s16))) svint16_t svaddwt(svint16_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_u32))) svuint32_t svaddwt(svuint32_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_u64))) svuint64_t svaddwt(svuint64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddwt_u16))) svuint16_t svaddwt(svuint16_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_n_u8))) svuint8_t svbcax(svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_n_u32))) svuint32_t svbcax(svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_n_u64))) svuint64_t svbcax(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_n_u16))) svuint16_t svbcax(svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_n_s8))) svint8_t svbcax(svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_n_s32))) svint32_t svbcax(svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_n_s64))) svint64_t svbcax(svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_n_s16))) svint16_t svbcax(svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_u8))) svuint8_t svbcax(svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_u32))) svuint32_t svbcax(svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_u64))) svuint64_t svbcax(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_u16))) svuint16_t svbcax(svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_s8))) svint8_t svbcax(svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_s32))) svint32_t svbcax(svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_s64))) svint64_t svbcax(svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbcax_s16))) svint16_t svbcax(svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_n_u8))) svuint8_t svbsl1n(svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_n_u32))) svuint32_t svbsl1n(svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_n_u64))) svuint64_t svbsl1n(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_n_u16))) svuint16_t svbsl1n(svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_n_s8))) svint8_t svbsl1n(svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_n_s32))) svint32_t svbsl1n(svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_n_s64))) svint64_t svbsl1n(svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_n_s16))) svint16_t svbsl1n(svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_u8))) svuint8_t svbsl1n(svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_u32))) svuint32_t svbsl1n(svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_u64))) svuint64_t svbsl1n(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_u16))) svuint16_t svbsl1n(svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_s8))) svint8_t svbsl1n(svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_s32))) svint32_t svbsl1n(svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_s64))) svint64_t svbsl1n(svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl1n_s16))) svint16_t svbsl1n(svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_n_u8))) svuint8_t svbsl2n(svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_n_u32))) svuint32_t svbsl2n(svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_n_u64))) svuint64_t svbsl2n(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_n_u16))) svuint16_t svbsl2n(svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_n_s8))) svint8_t svbsl2n(svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_n_s32))) svint32_t svbsl2n(svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_n_s64))) svint64_t svbsl2n(svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_n_s16))) svint16_t svbsl2n(svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_u8))) svuint8_t svbsl2n(svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_u32))) svuint32_t svbsl2n(svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_u64))) svuint64_t svbsl2n(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_u16))) svuint16_t svbsl2n(svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_s8))) svint8_t svbsl2n(svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_s32))) svint32_t svbsl2n(svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_s64))) svint64_t svbsl2n(svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl2n_s16))) svint16_t svbsl2n(svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_n_u8))) svuint8_t svbsl(svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_n_u32))) svuint32_t svbsl(svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_n_u64))) svuint64_t svbsl(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_n_u16))) svuint16_t svbsl(svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_n_s8))) svint8_t svbsl(svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_n_s32))) svint32_t svbsl(svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_n_s64))) svint64_t svbsl(svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_n_s16))) svint16_t svbsl(svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_u8))) svuint8_t svbsl(svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_u32))) svuint32_t svbsl(svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_u64))) svuint64_t svbsl(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_u16))) svuint16_t svbsl(svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_s8))) svint8_t svbsl(svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_s32))) svint32_t svbsl(svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_s64))) svint64_t svbsl(svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbsl_s16))) svint16_t svbsl(svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_u8))) svuint8_t svcadd(svuint8_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_u32))) svuint32_t svcadd(svuint32_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_u64))) svuint64_t svcadd(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_u16))) svuint16_t svcadd(svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_s8))) svint8_t svcadd(svint8_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_s32))) svint32_t svcadd(svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_s64))) svint64_t svcadd(svint64_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcadd_s16))) svint16_t svcadd(svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcdot_s32))) svint32_t svcdot(svint32_t, svint8_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcdot_s64))) svint64_t svcdot(svint64_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcdot_lane_s32))) svint32_t svcdot_lane(svint32_t, svint8_t, svint8_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcdot_lane_s64))) svint64_t svcdot_lane(svint64_t, svint16_t, svint16_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_u8))) svuint8_t svcmla(svuint8_t, svuint8_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_u32))) svuint32_t svcmla(svuint32_t, svuint32_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_u64))) svuint64_t svcmla(svuint64_t, svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_u16))) svuint16_t svcmla(svuint16_t, svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_s8))) svint8_t svcmla(svint8_t, svint8_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_s32))) svint32_t svcmla(svint32_t, svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_s64))) svint64_t svcmla(svint64_t, svint64_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_s16))) svint16_t svcmla(svint16_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_lane_u32))) svuint32_t svcmla_lane(svuint32_t, svuint32_t, svuint32_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_lane_u16))) svuint16_t svcmla_lane(svuint16_t, svuint16_t, svuint16_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_lane_s32))) svint32_t svcmla_lane(svint32_t, svint32_t, svint32_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcmla_lane_s16))) svint16_t svcmla_lane(svint16_t, svint16_t, svint16_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt_f32_f16_m))) svfloat32_t svcvtlt_f32_m(svfloat32_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt_f32_f16_x))) svfloat32_t svcvtlt_f32_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt_f64_f32_m))) svfloat64_t svcvtlt_f64_m(svfloat64_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt_f64_f32_x))) svfloat64_t svcvtlt_f64_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_f16_f32_m))) svfloat16_t svcvtnt_f16_m(svfloat16_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_f32_f64_m))) svfloat32_t svcvtnt_f32_m(svfloat32_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtx_f32_f64_m))) svfloat32_t svcvtx_f32_m(svfloat32_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtx_f32_f64_x))) svfloat32_t svcvtx_f32_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtx_f32_f64_z))) svfloat32_t svcvtx_f32_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtxnt_f32_f64_m))) svfloat32_t svcvtxnt_f32_m(svfloat32_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_n_u8))) svuint8_t sveor3(svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_n_u32))) svuint32_t sveor3(svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_n_u64))) svuint64_t sveor3(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_n_u16))) svuint16_t sveor3(svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_n_s8))) svint8_t sveor3(svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_n_s32))) svint32_t sveor3(svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_n_s64))) svint64_t sveor3(svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_n_s16))) svint16_t sveor3(svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_u8))) svuint8_t sveor3(svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_u32))) svuint32_t sveor3(svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_u64))) svuint64_t sveor3(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_u16))) svuint16_t sveor3(svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_s8))) svint8_t sveor3(svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_s32))) svint32_t sveor3(svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_s64))) svint64_t sveor3(svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveor3_s16))) svint16_t sveor3(svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_n_u8))) svuint8_t sveorbt(svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_n_u32))) svuint32_t sveorbt(svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_n_u64))) svuint64_t sveorbt(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_n_u16))) svuint16_t sveorbt(svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_n_s8))) svint8_t sveorbt(svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_n_s32))) svint32_t sveorbt(svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_n_s64))) svint64_t sveorbt(svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_n_s16))) svint16_t sveorbt(svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_u8))) svuint8_t sveorbt(svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_u32))) svuint32_t sveorbt(svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_u64))) svuint64_t sveorbt(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_u16))) svuint16_t sveorbt(svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_s8))) svint8_t sveorbt(svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_s32))) svint32_t sveorbt(svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_s64))) svint64_t sveorbt(svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorbt_s16))) svint16_t sveorbt(svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_n_u8))) svuint8_t sveortb(svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_n_u32))) svuint32_t sveortb(svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_n_u64))) svuint64_t sveortb(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_n_u16))) svuint16_t sveortb(svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_n_s8))) svint8_t sveortb(svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_n_s32))) svint32_t sveortb(svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_n_s64))) svint64_t sveortb(svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_n_s16))) svint16_t sveortb(svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_u8))) svuint8_t sveortb(svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_u32))) svuint32_t sveortb(svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_u64))) svuint64_t sveortb(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_u16))) svuint16_t sveortb(svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_s8))) svint8_t sveortb(svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_s32))) svint32_t sveortb(svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_s64))) svint64_t sveortb(svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveortb_s16))) svint16_t sveortb(svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s8_m))) svint8_t svhadd_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s32_m))) svint32_t svhadd_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s64_m))) svint64_t svhadd_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s16_m))) svint16_t svhadd_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s8_x))) svint8_t svhadd_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s32_x))) svint32_t svhadd_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s64_x))) svint64_t svhadd_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s16_x))) svint16_t svhadd_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s8_z))) svint8_t svhadd_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s32_z))) svint32_t svhadd_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s64_z))) svint64_t svhadd_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_s16_z))) svint16_t svhadd_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u8_m))) svuint8_t svhadd_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u32_m))) svuint32_t svhadd_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u64_m))) svuint64_t svhadd_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u16_m))) svuint16_t svhadd_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u8_x))) svuint8_t svhadd_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u32_x))) svuint32_t svhadd_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u64_x))) svuint64_t svhadd_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u16_x))) svuint16_t svhadd_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u8_z))) svuint8_t svhadd_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u32_z))) svuint32_t svhadd_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u64_z))) svuint64_t svhadd_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_n_u16_z))) svuint16_t svhadd_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s8_m))) svint8_t svhadd_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s32_m))) svint32_t svhadd_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s64_m))) svint64_t svhadd_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s16_m))) svint16_t svhadd_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s8_x))) svint8_t svhadd_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s32_x))) svint32_t svhadd_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s64_x))) svint64_t svhadd_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s16_x))) svint16_t svhadd_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s8_z))) svint8_t svhadd_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s32_z))) svint32_t svhadd_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s64_z))) svint64_t svhadd_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_s16_z))) svint16_t svhadd_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u8_m))) svuint8_t svhadd_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u32_m))) svuint32_t svhadd_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u64_m))) svuint64_t svhadd_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u16_m))) svuint16_t svhadd_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u8_x))) svuint8_t svhadd_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u32_x))) svuint32_t svhadd_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u64_x))) svuint64_t svhadd_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u16_x))) svuint16_t svhadd_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u8_z))) svuint8_t svhadd_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u32_z))) svuint32_t svhadd_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u64_z))) svuint64_t svhadd_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhadd_u16_z))) svuint16_t svhadd_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_u32_z))) svuint32_t svhistcnt_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_u64_z))) svuint64_t svhistcnt_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_s32_z))) svuint32_t svhistcnt_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_s64_z))) svuint64_t svhistcnt_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistseg_u8))) svuint8_t svhistseg(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistseg_s8))) svuint8_t svhistseg(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s8_m))) svint8_t svhsub_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s32_m))) svint32_t svhsub_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s64_m))) svint64_t svhsub_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s16_m))) svint16_t svhsub_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s8_x))) svint8_t svhsub_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s32_x))) svint32_t svhsub_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s64_x))) svint64_t svhsub_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s16_x))) svint16_t svhsub_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s8_z))) svint8_t svhsub_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s32_z))) svint32_t svhsub_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s64_z))) svint64_t svhsub_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_s16_z))) svint16_t svhsub_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u8_m))) svuint8_t svhsub_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u32_m))) svuint32_t svhsub_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u64_m))) svuint64_t svhsub_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u16_m))) svuint16_t svhsub_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u8_x))) svuint8_t svhsub_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u32_x))) svuint32_t svhsub_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u64_x))) svuint64_t svhsub_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u16_x))) svuint16_t svhsub_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u8_z))) svuint8_t svhsub_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u32_z))) svuint32_t svhsub_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u64_z))) svuint64_t svhsub_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_n_u16_z))) svuint16_t svhsub_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s8_m))) svint8_t svhsub_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s32_m))) svint32_t svhsub_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s64_m))) svint64_t svhsub_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s16_m))) svint16_t svhsub_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s8_x))) svint8_t svhsub_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s32_x))) svint32_t svhsub_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s64_x))) svint64_t svhsub_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s16_x))) svint16_t svhsub_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s8_z))) svint8_t svhsub_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s32_z))) svint32_t svhsub_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s64_z))) svint64_t svhsub_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_s16_z))) svint16_t svhsub_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u8_m))) svuint8_t svhsub_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u32_m))) svuint32_t svhsub_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u64_m))) svuint64_t svhsub_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u16_m))) svuint16_t svhsub_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u8_x))) svuint8_t svhsub_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u32_x))) svuint32_t svhsub_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u64_x))) svuint64_t svhsub_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u16_x))) svuint16_t svhsub_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u8_z))) svuint8_t svhsub_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u32_z))) svuint32_t svhsub_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u64_z))) svuint64_t svhsub_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsub_u16_z))) svuint16_t svhsub_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s8_m))) svint8_t svhsubr_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s32_m))) svint32_t svhsubr_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s64_m))) svint64_t svhsubr_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s16_m))) svint16_t svhsubr_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s8_x))) svint8_t svhsubr_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s32_x))) svint32_t svhsubr_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s64_x))) svint64_t svhsubr_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s16_x))) svint16_t svhsubr_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s8_z))) svint8_t svhsubr_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s32_z))) svint32_t svhsubr_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s64_z))) svint64_t svhsubr_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_s16_z))) svint16_t svhsubr_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u8_m))) svuint8_t svhsubr_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u32_m))) svuint32_t svhsubr_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u64_m))) svuint64_t svhsubr_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u16_m))) svuint16_t svhsubr_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u8_x))) svuint8_t svhsubr_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u32_x))) svuint32_t svhsubr_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u64_x))) svuint64_t svhsubr_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u16_x))) svuint16_t svhsubr_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u8_z))) svuint8_t svhsubr_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u32_z))) svuint32_t svhsubr_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u64_z))) svuint64_t svhsubr_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_n_u16_z))) svuint16_t svhsubr_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s8_m))) svint8_t svhsubr_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s32_m))) svint32_t svhsubr_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s64_m))) svint64_t svhsubr_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s16_m))) svint16_t svhsubr_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s8_x))) svint8_t svhsubr_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s32_x))) svint32_t svhsubr_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s64_x))) svint64_t svhsubr_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s16_x))) svint16_t svhsubr_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s8_z))) svint8_t svhsubr_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s32_z))) svint32_t svhsubr_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s64_z))) svint64_t svhsubr_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_s16_z))) svint16_t svhsubr_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u8_m))) svuint8_t svhsubr_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u32_m))) svuint32_t svhsubr_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u64_m))) svuint64_t svhsubr_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u16_m))) svuint16_t svhsubr_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u8_x))) svuint8_t svhsubr_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u32_x))) svuint32_t svhsubr_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u64_x))) svuint64_t svhsubr_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u16_x))) svuint16_t svhsubr_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u8_z))) svuint8_t svhsubr_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u32_z))) svuint32_t svhsubr_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u64_z))) svuint64_t svhsubr_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhsubr_u16_z))) svuint16_t svhsubr_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_u32))) svuint32_t svldnt1_gather_index_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_u64))) svuint64_t svldnt1_gather_index_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_f64))) svfloat64_t svldnt1_gather_index_f64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_f32))) svfloat32_t svldnt1_gather_index_f32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_s32))) svint32_t svldnt1_gather_index_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_s64))) svint64_t svldnt1_gather_index_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_u32))) svuint32_t svldnt1_gather_offset_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_u64))) svuint64_t svldnt1_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_f64))) svfloat64_t svldnt1_gather_offset_f64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_f32))) svfloat32_t svldnt1_gather_offset_f32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_s32))) svint32_t svldnt1_gather_offset_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_s64))) svint64_t svldnt1_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_u32))) svuint32_t svldnt1_gather_u32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_u64))) svuint64_t svldnt1_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_f64))) svfloat64_t svldnt1_gather_f64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_f32))) svfloat32_t svldnt1_gather_f32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_s32))) svint32_t svldnt1_gather_s32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_s64))) svint64_t svldnt1_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_u64))) svuint64_t svldnt1_gather_index(svbool_t, uint64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_f64))) svfloat64_t svldnt1_gather_index(svbool_t, float64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_s64))) svint64_t svldnt1_gather_index(svbool_t, int64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_u64))) svuint64_t svldnt1_gather_index(svbool_t, uint64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_f64))) svfloat64_t svldnt1_gather_index(svbool_t, float64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_s64))) svint64_t svldnt1_gather_index(svbool_t, int64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_u32))) svuint32_t svldnt1_gather_offset(svbool_t, uint32_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_f32))) svfloat32_t svldnt1_gather_offset(svbool_t, float32_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_s32))) svint32_t svldnt1_gather_offset(svbool_t, int32_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_u64))) svuint64_t svldnt1_gather_offset(svbool_t, uint64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_f64))) svfloat64_t svldnt1_gather_offset(svbool_t, float64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_s64))) svint64_t svldnt1_gather_offset(svbool_t, int64_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_u64))) svuint64_t svldnt1_gather_offset(svbool_t, uint64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_f64))) svfloat64_t svldnt1_gather_offset(svbool_t, float64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_s64))) svint64_t svldnt1_gather_offset(svbool_t, int64_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_offset_u32))) svuint32_t svldnt1sb_gather_offset_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_offset_u64))) svuint64_t svldnt1sb_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_offset_s32))) svint32_t svldnt1sb_gather_offset_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_offset_s64))) svint64_t svldnt1sb_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_u32))) svuint32_t svldnt1sb_gather_u32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_u64))) svuint64_t svldnt1sb_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_s32))) svint32_t svldnt1sb_gather_s32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_s64))) svint64_t svldnt1sb_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32offset_u32))) svuint32_t svldnt1sb_gather_offset_u32(svbool_t, int8_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32offset_s32))) svint32_t svldnt1sb_gather_offset_s32(svbool_t, int8_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_s64offset_u64))) svuint64_t svldnt1sb_gather_offset_u64(svbool_t, int8_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_s64offset_s64))) svint64_t svldnt1sb_gather_offset_s64(svbool_t, int8_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64offset_u64))) svuint64_t svldnt1sb_gather_offset_u64(svbool_t, int8_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64offset_s64))) svint64_t svldnt1sb_gather_offset_s64(svbool_t, int8_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_index_u32))) svuint32_t svldnt1sh_gather_index_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_index_u64))) svuint64_t svldnt1sh_gather_index_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_index_s32))) svint32_t svldnt1sh_gather_index_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_index_s64))) svint64_t svldnt1sh_gather_index_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_offset_u32))) svuint32_t svldnt1sh_gather_offset_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_offset_u64))) svuint64_t svldnt1sh_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_offset_s32))) svint32_t svldnt1sh_gather_offset_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_offset_s64))) svint64_t svldnt1sh_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_u32))) svuint32_t svldnt1sh_gather_u32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_u64))) svuint64_t svldnt1sh_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_s32))) svint32_t svldnt1sh_gather_s32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_s64))) svint64_t svldnt1sh_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64index_u64))) svuint64_t svldnt1sh_gather_index_u64(svbool_t, int16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64index_s64))) svint64_t svldnt1sh_gather_index_s64(svbool_t, int16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64index_u64))) svuint64_t svldnt1sh_gather_index_u64(svbool_t, int16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64index_s64))) svint64_t svldnt1sh_gather_index_s64(svbool_t, int16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32offset_u32))) svuint32_t svldnt1sh_gather_offset_u32(svbool_t, int16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32offset_s32))) svint32_t svldnt1sh_gather_offset_s32(svbool_t, int16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64offset_u64))) svuint64_t svldnt1sh_gather_offset_u64(svbool_t, int16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64offset_s64))) svint64_t svldnt1sh_gather_offset_s64(svbool_t, int16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64offset_u64))) svuint64_t svldnt1sh_gather_offset_u64(svbool_t, int16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64offset_s64))) svint64_t svldnt1sh_gather_offset_s64(svbool_t, int16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_index_u64))) svuint64_t svldnt1sw_gather_index_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_index_s64))) svint64_t svldnt1sw_gather_index_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_offset_u64))) svuint64_t svldnt1sw_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_offset_s64))) svint64_t svldnt1sw_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_u64))) svuint64_t svldnt1sw_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_s64))) svint64_t svldnt1sw_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64index_u64))) svuint64_t svldnt1sw_gather_index_u64(svbool_t, int32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64index_s64))) svint64_t svldnt1sw_gather_index_s64(svbool_t, int32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64index_u64))) svuint64_t svldnt1sw_gather_index_u64(svbool_t, int32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64index_s64))) svint64_t svldnt1sw_gather_index_s64(svbool_t, int32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64offset_u64))) svuint64_t svldnt1sw_gather_offset_u64(svbool_t, int32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64offset_s64))) svint64_t svldnt1sw_gather_offset_s64(svbool_t, int32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64offset_u64))) svuint64_t svldnt1sw_gather_offset_u64(svbool_t, int32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64offset_s64))) svint64_t svldnt1sw_gather_offset_s64(svbool_t, int32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_offset_u32))) svuint32_t svldnt1ub_gather_offset_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_offset_u64))) svuint64_t svldnt1ub_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_offset_s32))) svint32_t svldnt1ub_gather_offset_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_offset_s64))) svint64_t svldnt1ub_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_u32))) svuint32_t svldnt1ub_gather_u32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_u64))) svuint64_t svldnt1ub_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_s32))) svint32_t svldnt1ub_gather_s32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_s64))) svint64_t svldnt1ub_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32offset_u32))) svuint32_t svldnt1ub_gather_offset_u32(svbool_t, uint8_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32offset_s32))) svint32_t svldnt1ub_gather_offset_s32(svbool_t, uint8_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_s64offset_u64))) svuint64_t svldnt1ub_gather_offset_u64(svbool_t, uint8_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_s64offset_s64))) svint64_t svldnt1ub_gather_offset_s64(svbool_t, uint8_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64offset_u64))) svuint64_t svldnt1ub_gather_offset_u64(svbool_t, uint8_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64offset_s64))) svint64_t svldnt1ub_gather_offset_s64(svbool_t, uint8_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_index_u32))) svuint32_t svldnt1uh_gather_index_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_index_u64))) svuint64_t svldnt1uh_gather_index_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_index_s32))) svint32_t svldnt1uh_gather_index_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_index_s64))) svint64_t svldnt1uh_gather_index_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_offset_u32))) svuint32_t svldnt1uh_gather_offset_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_offset_u64))) svuint64_t svldnt1uh_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_offset_s32))) svint32_t svldnt1uh_gather_offset_s32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_offset_s64))) svint64_t svldnt1uh_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_u32))) svuint32_t svldnt1uh_gather_u32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_u64))) svuint64_t svldnt1uh_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_s32))) svint32_t svldnt1uh_gather_s32(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_s64))) svint64_t svldnt1uh_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64index_u64))) svuint64_t svldnt1uh_gather_index_u64(svbool_t, uint16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64index_s64))) svint64_t svldnt1uh_gather_index_s64(svbool_t, uint16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64index_u64))) svuint64_t svldnt1uh_gather_index_u64(svbool_t, uint16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64index_s64))) svint64_t svldnt1uh_gather_index_s64(svbool_t, uint16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32offset_u32))) svuint32_t svldnt1uh_gather_offset_u32(svbool_t, uint16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32offset_s32))) svint32_t svldnt1uh_gather_offset_s32(svbool_t, uint16_t const *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64offset_u64))) svuint64_t svldnt1uh_gather_offset_u64(svbool_t, uint16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64offset_s64))) svint64_t svldnt1uh_gather_offset_s64(svbool_t, uint16_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64offset_u64))) svuint64_t svldnt1uh_gather_offset_u64(svbool_t, uint16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64offset_s64))) svint64_t svldnt1uh_gather_offset_s64(svbool_t, uint16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_index_u64))) svuint64_t svldnt1uw_gather_index_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_index_s64))) svint64_t svldnt1uw_gather_index_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_offset_u64))) svuint64_t svldnt1uw_gather_offset_u64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_offset_s64))) svint64_t svldnt1uw_gather_offset_s64(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_u64))) svuint64_t svldnt1uw_gather_u64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_s64))) svint64_t svldnt1uw_gather_s64(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64index_u64))) svuint64_t svldnt1uw_gather_index_u64(svbool_t, uint32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64index_s64))) svint64_t svldnt1uw_gather_index_s64(svbool_t, uint32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64index_u64))) svuint64_t svldnt1uw_gather_index_u64(svbool_t, uint32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64index_s64))) svint64_t svldnt1uw_gather_index_s64(svbool_t, uint32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64offset_u64))) svuint64_t svldnt1uw_gather_offset_u64(svbool_t, uint32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64offset_s64))) svint64_t svldnt1uw_gather_offset_s64(svbool_t, uint32_t const *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64offset_u64))) svuint64_t svldnt1uw_gather_offset_u64(svbool_t, uint32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64offset_s64))) svint64_t svldnt1uw_gather_offset_s64(svbool_t, uint32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f64_m))) svint64_t svlogb_m(svint64_t, svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f32_m))) svint32_t svlogb_m(svint32_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f16_m))) svint16_t svlogb_m(svint16_t, svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f64_x))) svint64_t svlogb_x(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f32_x))) svint32_t svlogb_x(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f16_x))) svint16_t svlogb_x(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f64_z))) svint64_t svlogb_z(svbool_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f32_z))) svint32_t svlogb_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlogb_f16_z))) svint16_t svlogb_z(svbool_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_u8))) svbool_t svmatch(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_u16))) svbool_t svmatch(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_s8))) svbool_t svmatch(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_s16))) svbool_t svmatch(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmp_f64_m))) svfloat64_t svmaxnmp_m(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmp_f32_m))) svfloat32_t svmaxnmp_m(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmp_f16_m))) svfloat16_t svmaxnmp_m(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmp_f64_x))) svfloat64_t svmaxnmp_x(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmp_f32_x))) svfloat32_t svmaxnmp_x(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmp_f16_x))) svfloat16_t svmaxnmp_x(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_f64_m))) svfloat64_t svmaxp_m(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_f32_m))) svfloat32_t svmaxp_m(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_f16_m))) svfloat16_t svmaxp_m(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_f64_x))) svfloat64_t svmaxp_x(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_f32_x))) svfloat32_t svmaxp_x(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_f16_x))) svfloat16_t svmaxp_x(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_s8_m))) svint8_t svmaxp_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_s32_m))) svint32_t svmaxp_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_s64_m))) svint64_t svmaxp_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_s16_m))) svint16_t svmaxp_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_s8_x))) svint8_t svmaxp_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_s32_x))) svint32_t svmaxp_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_s64_x))) svint64_t svmaxp_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_s16_x))) svint16_t svmaxp_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_u8_m))) svuint8_t svmaxp_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_u32_m))) svuint32_t svmaxp_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_u64_m))) svuint64_t svmaxp_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_u16_m))) svuint16_t svmaxp_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_u8_x))) svuint8_t svmaxp_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_u32_x))) svuint32_t svmaxp_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_u64_x))) svuint64_t svmaxp_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxp_u16_x))) svuint16_t svmaxp_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmp_f64_m))) svfloat64_t svminnmp_m(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmp_f32_m))) svfloat32_t svminnmp_m(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmp_f16_m))) svfloat16_t svminnmp_m(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmp_f64_x))) svfloat64_t svminnmp_x(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmp_f32_x))) svfloat32_t svminnmp_x(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmp_f16_x))) svfloat16_t svminnmp_x(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_f64_m))) svfloat64_t svminp_m(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_f32_m))) svfloat32_t svminp_m(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_f16_m))) svfloat16_t svminp_m(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_f64_x))) svfloat64_t svminp_x(svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_f32_x))) svfloat32_t svminp_x(svbool_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_f16_x))) svfloat16_t svminp_x(svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_s8_m))) svint8_t svminp_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_s32_m))) svint32_t svminp_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_s64_m))) svint64_t svminp_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_s16_m))) svint16_t svminp_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_s8_x))) svint8_t svminp_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_s32_x))) svint32_t svminp_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_s64_x))) svint64_t svminp_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_s16_x))) svint16_t svminp_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_u8_m))) svuint8_t svminp_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_u32_m))) svuint32_t svminp_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_u64_m))) svuint64_t svminp_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_u16_m))) svuint16_t svminp_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_u8_x))) svuint8_t svminp_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_u32_x))) svuint32_t svminp_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_u64_x))) svuint64_t svminp_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminp_u16_x))) svuint16_t svminp_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_u32))) svuint32_t svmla_lane(svuint32_t, svuint32_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_u64))) svuint64_t svmla_lane(svuint64_t, svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_u16))) svuint16_t svmla_lane(svuint16_t, svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_s32))) svint32_t svmla_lane(svint32_t, svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_s64))) svint64_t svmla_lane(svint64_t, svint64_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_s16))) svint16_t svmla_lane(svint16_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_f32))) svfloat32_t svmlalb(svfloat32_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_s32))) svint32_t svmlalb(svint32_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_s64))) svint64_t svmlalb(svint64_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_s16))) svint16_t svmlalb(svint16_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_u32))) svuint32_t svmlalb(svuint32_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_u64))) svuint64_t svmlalb(svuint64_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_u16))) svuint16_t svmlalb(svuint16_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_f32))) svfloat32_t svmlalb(svfloat32_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_s32))) svint32_t svmlalb(svint32_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_s64))) svint64_t svmlalb(svint64_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_s16))) svint16_t svmlalb(svint16_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_u32))) svuint32_t svmlalb(svuint32_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_u64))) svuint64_t svmlalb(svuint64_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_u16))) svuint16_t svmlalb(svuint16_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_lane_f32))) svfloat32_t svmlalb_lane(svfloat32_t, svfloat16_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_lane_s32))) svint32_t svmlalb_lane(svint32_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_lane_s64))) svint64_t svmlalb_lane(svint64_t, svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_lane_u32))) svuint32_t svmlalb_lane(svuint32_t, svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_lane_u64))) svuint64_t svmlalb_lane(svuint64_t, svuint32_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_f32))) svfloat32_t svmlalt(svfloat32_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_s32))) svint32_t svmlalt(svint32_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_s64))) svint64_t svmlalt(svint64_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_s16))) svint16_t svmlalt(svint16_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_u32))) svuint32_t svmlalt(svuint32_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_u64))) svuint64_t svmlalt(svuint64_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_u16))) svuint16_t svmlalt(svuint16_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_f32))) svfloat32_t svmlalt(svfloat32_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_s32))) svint32_t svmlalt(svint32_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_s64))) svint64_t svmlalt(svint64_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_s16))) svint16_t svmlalt(svint16_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_u32))) svuint32_t svmlalt(svuint32_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_u64))) svuint64_t svmlalt(svuint64_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_u16))) svuint16_t svmlalt(svuint16_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_lane_f32))) svfloat32_t svmlalt_lane(svfloat32_t, svfloat16_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_lane_s32))) svint32_t svmlalt_lane(svint32_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_lane_s64))) svint64_t svmlalt_lane(svint64_t, svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_lane_u32))) svuint32_t svmlalt_lane(svuint32_t, svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_lane_u64))) svuint64_t svmlalt_lane(svuint64_t, svuint32_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_u32))) svuint32_t svmls_lane(svuint32_t, svuint32_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_u64))) svuint64_t svmls_lane(svuint64_t, svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_u16))) svuint16_t svmls_lane(svuint16_t, svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_s32))) svint32_t svmls_lane(svint32_t, svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_s64))) svint64_t svmls_lane(svint64_t, svint64_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_s16))) svint16_t svmls_lane(svint16_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_n_f32))) svfloat32_t svmlslb(svfloat32_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_n_s32))) svint32_t svmlslb(svint32_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_n_s64))) svint64_t svmlslb(svint64_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_n_s16))) svint16_t svmlslb(svint16_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_n_u32))) svuint32_t svmlslb(svuint32_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_n_u64))) svuint64_t svmlslb(svuint64_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_n_u16))) svuint16_t svmlslb(svuint16_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_f32))) svfloat32_t svmlslb(svfloat32_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_s32))) svint32_t svmlslb(svint32_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_s64))) svint64_t svmlslb(svint64_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_s16))) svint16_t svmlslb(svint16_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_u32))) svuint32_t svmlslb(svuint32_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_u64))) svuint64_t svmlslb(svuint64_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_u16))) svuint16_t svmlslb(svuint16_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_lane_f32))) svfloat32_t svmlslb_lane(svfloat32_t, svfloat16_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_lane_s32))) svint32_t svmlslb_lane(svint32_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_lane_s64))) svint64_t svmlslb_lane(svint64_t, svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_lane_u32))) svuint32_t svmlslb_lane(svuint32_t, svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslb_lane_u64))) svuint64_t svmlslb_lane(svuint64_t, svuint32_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_n_f32))) svfloat32_t svmlslt(svfloat32_t, svfloat16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_n_s32))) svint32_t svmlslt(svint32_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_n_s64))) svint64_t svmlslt(svint64_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_n_s16))) svint16_t svmlslt(svint16_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_n_u32))) svuint32_t svmlslt(svuint32_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_n_u64))) svuint64_t svmlslt(svuint64_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_n_u16))) svuint16_t svmlslt(svuint16_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_f32))) svfloat32_t svmlslt(svfloat32_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_s32))) svint32_t svmlslt(svint32_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_s64))) svint64_t svmlslt(svint64_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_s16))) svint16_t svmlslt(svint16_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_u32))) svuint32_t svmlslt(svuint32_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_u64))) svuint64_t svmlslt(svuint64_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_u16))) svuint16_t svmlslt(svuint16_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_lane_f32))) svfloat32_t svmlslt_lane(svfloat32_t, svfloat16_t, svfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_lane_s32))) svint32_t svmlslt_lane(svint32_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_lane_s64))) svint64_t svmlslt_lane(svint64_t, svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_lane_u32))) svuint32_t svmlslt_lane(svuint32_t, svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlslt_lane_u64))) svuint64_t svmlslt_lane(svuint64_t, svuint32_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlb_s32))) svint32_t svmovlb(svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlb_s64))) svint64_t svmovlb(svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlb_s16))) svint16_t svmovlb(svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlb_u32))) svuint32_t svmovlb(svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlb_u64))) svuint64_t svmovlb(svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlb_u16))) svuint16_t svmovlb(svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlt_s32))) svint32_t svmovlt(svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlt_s64))) svint64_t svmovlt(svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlt_s16))) svint16_t svmovlt(svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlt_u32))) svuint32_t svmovlt(svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlt_u64))) svuint64_t svmovlt(svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmovlt_u16))) svuint16_t svmovlt(svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_u32))) svuint32_t svmul_lane(svuint32_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_u64))) svuint64_t svmul_lane(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_u16))) svuint16_t svmul_lane(svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_s32))) svint32_t svmul_lane(svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_s64))) svint64_t svmul_lane(svint64_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_s16))) svint16_t svmul_lane(svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_n_s32))) svint32_t svmullb(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_n_s64))) svint64_t svmullb(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_n_s16))) svint16_t svmullb(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_n_u32))) svuint32_t svmullb(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_n_u64))) svuint64_t svmullb(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_n_u16))) svuint16_t svmullb(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_s32))) svint32_t svmullb(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_s64))) svint64_t svmullb(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_s16))) svint16_t svmullb(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_u32))) svuint32_t svmullb(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_u64))) svuint64_t svmullb(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_u16))) svuint16_t svmullb(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_lane_s32))) svint32_t svmullb_lane(svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_lane_s64))) svint64_t svmullb_lane(svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_lane_u32))) svuint32_t svmullb_lane(svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullb_lane_u64))) svuint64_t svmullb_lane(svuint32_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_n_s32))) svint32_t svmullt(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_n_s64))) svint64_t svmullt(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_n_s16))) svint16_t svmullt(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_n_u32))) svuint32_t svmullt(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_n_u64))) svuint64_t svmullt(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_n_u16))) svuint16_t svmullt(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_s32))) svint32_t svmullt(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_s64))) svint64_t svmullt(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_s16))) svint16_t svmullt(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_u32))) svuint32_t svmullt(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_u64))) svuint64_t svmullt(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_u16))) svuint16_t svmullt(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_lane_s32))) svint32_t svmullt_lane(svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_lane_s64))) svint64_t svmullt_lane(svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_lane_u32))) svuint32_t svmullt_lane(svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmullt_lane_u64))) svuint64_t svmullt_lane(svuint32_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_n_u8))) svuint8_t svnbsl(svuint8_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_n_u32))) svuint32_t svnbsl(svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_n_u64))) svuint64_t svnbsl(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_n_u16))) svuint16_t svnbsl(svuint16_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_n_s8))) svint8_t svnbsl(svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_n_s32))) svint32_t svnbsl(svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_n_s64))) svint64_t svnbsl(svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_n_s16))) svint16_t svnbsl(svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_u8))) svuint8_t svnbsl(svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_u32))) svuint32_t svnbsl(svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_u64))) svuint64_t svnbsl(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_u16))) svuint16_t svnbsl(svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_s8))) svint8_t svnbsl(svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_s32))) svint32_t svnbsl(svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_s64))) svint64_t svnbsl(svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnbsl_s16))) svint16_t svnbsl(svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_u8))) svbool_t svnmatch(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_u16))) svbool_t svnmatch(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_s8))) svbool_t svnmatch(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_s16))) svbool_t svnmatch(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmul_n_u8))) svuint8_t svpmul(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmul_u8))) svuint8_t svpmul(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_n_u64))) svuint64_t svpmullb(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_n_u16))) svuint16_t svpmullb(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_u64))) svuint64_t svpmullb(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_u16))) svuint16_t svpmullb(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_n_u8))) svuint8_t svpmullb_pair(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_n_u32))) svuint32_t svpmullb_pair(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_u8))) svuint8_t svpmullb_pair(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_u32))) svuint32_t svpmullb_pair(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_n_u64))) svuint64_t svpmullt(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_n_u16))) svuint16_t svpmullt(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_u64))) svuint64_t svpmullt(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_u16))) svuint16_t svpmullt(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_n_u8))) svuint8_t svpmullt_pair(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_n_u32))) svuint32_t svpmullt_pair(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_u8))) svuint8_t svpmullt_pair(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_u32))) svuint32_t svpmullt_pair(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s8_m))) svint8_t svqabs_m(svint8_t, svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s32_m))) svint32_t svqabs_m(svint32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s64_m))) svint64_t svqabs_m(svint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s16_m))) svint16_t svqabs_m(svint16_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s8_x))) svint8_t svqabs_x(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s32_x))) svint32_t svqabs_x(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s64_x))) svint64_t svqabs_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s16_x))) svint16_t svqabs_x(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s8_z))) svint8_t svqabs_z(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s32_z))) svint32_t svqabs_z(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s64_z))) svint64_t svqabs_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqabs_s16_z))) svint16_t svqabs_z(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s8_m))) svint8_t svqadd_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s32_m))) svint32_t svqadd_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s64_m))) svint64_t svqadd_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s16_m))) svint16_t svqadd_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s8_x))) svint8_t svqadd_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s32_x))) svint32_t svqadd_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s64_x))) svint64_t svqadd_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s16_x))) svint16_t svqadd_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s8_z))) svint8_t svqadd_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s32_z))) svint32_t svqadd_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s64_z))) svint64_t svqadd_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s16_z))) svint16_t svqadd_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u8_m))) svuint8_t svqadd_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u32_m))) svuint32_t svqadd_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u64_m))) svuint64_t svqadd_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u16_m))) svuint16_t svqadd_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u8_x))) svuint8_t svqadd_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u32_x))) svuint32_t svqadd_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u64_x))) svuint64_t svqadd_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u16_x))) svuint16_t svqadd_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u8_z))) svuint8_t svqadd_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u32_z))) svuint32_t svqadd_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u64_z))) svuint64_t svqadd_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_u16_z))) svuint16_t svqadd_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s8_m))) svint8_t svqadd_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s32_m))) svint32_t svqadd_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s64_m))) svint64_t svqadd_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s16_m))) svint16_t svqadd_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s8_x))) svint8_t svqadd_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s32_x))) svint32_t svqadd_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s64_x))) svint64_t svqadd_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s16_x))) svint16_t svqadd_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s8_z))) svint8_t svqadd_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s32_z))) svint32_t svqadd_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s64_z))) svint64_t svqadd_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_s16_z))) svint16_t svqadd_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u8_m))) svuint8_t svqadd_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u32_m))) svuint32_t svqadd_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u64_m))) svuint64_t svqadd_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u16_m))) svuint16_t svqadd_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u8_x))) svuint8_t svqadd_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u32_x))) svuint32_t svqadd_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u64_x))) svuint64_t svqadd_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u16_x))) svuint16_t svqadd_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u8_z))) svuint8_t svqadd_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u32_z))) svuint32_t svqadd_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u64_z))) svuint64_t svqadd_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_u16_z))) svuint16_t svqadd_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcadd_s8))) svint8_t svqcadd(svint8_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcadd_s32))) svint32_t svqcadd(svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcadd_s64))) svint64_t svqcadd(svint64_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcadd_s16))) svint16_t svqcadd(svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalb_n_s32))) svint32_t svqdmlalb(svint32_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalb_n_s64))) svint64_t svqdmlalb(svint64_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalb_n_s16))) svint16_t svqdmlalb(svint16_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalb_s32))) svint32_t svqdmlalb(svint32_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalb_s64))) svint64_t svqdmlalb(svint64_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalb_s16))) svint16_t svqdmlalb(svint16_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalb_lane_s32))) svint32_t svqdmlalb_lane(svint32_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalb_lane_s64))) svint64_t svqdmlalb_lane(svint64_t, svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalbt_n_s32))) svint32_t svqdmlalbt(svint32_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalbt_n_s64))) svint64_t svqdmlalbt(svint64_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalbt_n_s16))) svint16_t svqdmlalbt(svint16_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalbt_s32))) svint32_t svqdmlalbt(svint32_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalbt_s64))) svint64_t svqdmlalbt(svint64_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalbt_s16))) svint16_t svqdmlalbt(svint16_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalt_n_s32))) svint32_t svqdmlalt(svint32_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalt_n_s64))) svint64_t svqdmlalt(svint64_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalt_n_s16))) svint16_t svqdmlalt(svint16_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalt_s32))) svint32_t svqdmlalt(svint32_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalt_s64))) svint64_t svqdmlalt(svint64_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalt_s16))) svint16_t svqdmlalt(svint16_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalt_lane_s32))) svint32_t svqdmlalt_lane(svint32_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlalt_lane_s64))) svint64_t svqdmlalt_lane(svint64_t, svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslb_n_s32))) svint32_t svqdmlslb(svint32_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslb_n_s64))) svint64_t svqdmlslb(svint64_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslb_n_s16))) svint16_t svqdmlslb(svint16_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslb_s32))) svint32_t svqdmlslb(svint32_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslb_s64))) svint64_t svqdmlslb(svint64_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslb_s16))) svint16_t svqdmlslb(svint16_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslb_lane_s32))) svint32_t svqdmlslb_lane(svint32_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslb_lane_s64))) svint64_t svqdmlslb_lane(svint64_t, svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslbt_n_s32))) svint32_t svqdmlslbt(svint32_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslbt_n_s64))) svint64_t svqdmlslbt(svint64_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslbt_n_s16))) svint16_t svqdmlslbt(svint16_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslbt_s32))) svint32_t svqdmlslbt(svint32_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslbt_s64))) svint64_t svqdmlslbt(svint64_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslbt_s16))) svint16_t svqdmlslbt(svint16_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslt_n_s32))) svint32_t svqdmlslt(svint32_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslt_n_s64))) svint64_t svqdmlslt(svint64_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslt_n_s16))) svint16_t svqdmlslt(svint16_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslt_s32))) svint32_t svqdmlslt(svint32_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslt_s64))) svint64_t svqdmlslt(svint64_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslt_s16))) svint16_t svqdmlslt(svint16_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslt_lane_s32))) svint32_t svqdmlslt_lane(svint32_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmlslt_lane_s64))) svint64_t svqdmlslt_lane(svint64_t, svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_n_s8))) svint8_t svqdmulh(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_n_s32))) svint32_t svqdmulh(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_n_s64))) svint64_t svqdmulh(svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_n_s16))) svint16_t svqdmulh(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s8))) svint8_t svqdmulh(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s32))) svint32_t svqdmulh(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s64))) svint64_t svqdmulh(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_s16))) svint16_t svqdmulh(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_lane_s32))) svint32_t svqdmulh_lane(svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_lane_s64))) svint64_t svqdmulh_lane(svint64_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmulh_lane_s16))) svint16_t svqdmulh_lane(svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullb_n_s32))) svint32_t svqdmullb(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullb_n_s64))) svint64_t svqdmullb(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullb_n_s16))) svint16_t svqdmullb(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullb_s32))) svint32_t svqdmullb(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullb_s64))) svint64_t svqdmullb(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullb_s16))) svint16_t svqdmullb(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullb_lane_s32))) svint32_t svqdmullb_lane(svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullb_lane_s64))) svint64_t svqdmullb_lane(svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullt_n_s32))) svint32_t svqdmullt(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullt_n_s64))) svint64_t svqdmullt(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullt_n_s16))) svint16_t svqdmullt(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullt_s32))) svint32_t svqdmullt(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullt_s64))) svint64_t svqdmullt(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullt_s16))) svint16_t svqdmullt(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullt_lane_s32))) svint32_t svqdmullt_lane(svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdmullt_lane_s64))) svint64_t svqdmullt_lane(svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s8_m))) svint8_t svqneg_m(svint8_t, svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s32_m))) svint32_t svqneg_m(svint32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s64_m))) svint64_t svqneg_m(svint64_t, svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s16_m))) svint16_t svqneg_m(svint16_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s8_x))) svint8_t svqneg_x(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s32_x))) svint32_t svqneg_x(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s64_x))) svint64_t svqneg_x(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s16_x))) svint16_t svqneg_x(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s8_z))) svint8_t svqneg_z(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s32_z))) svint32_t svqneg_z(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s64_z))) svint64_t svqneg_z(svbool_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqneg_s16_z))) svint16_t svqneg_z(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdcmlah_s8))) svint8_t svqrdcmlah(svint8_t, svint8_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdcmlah_s32))) svint32_t svqrdcmlah(svint32_t, svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdcmlah_s64))) svint64_t svqrdcmlah(svint64_t, svint64_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdcmlah_s16))) svint16_t svqrdcmlah(svint16_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdcmlah_lane_s32))) svint32_t svqrdcmlah_lane(svint32_t, svint32_t, svint32_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdcmlah_lane_s16))) svint16_t svqrdcmlah_lane(svint16_t, svint16_t, svint16_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_n_s8))) svint8_t svqrdmlah(svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_n_s32))) svint32_t svqrdmlah(svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_n_s64))) svint64_t svqrdmlah(svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_n_s16))) svint16_t svqrdmlah(svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_s8))) svint8_t svqrdmlah(svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_s32))) svint32_t svqrdmlah(svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_s64))) svint64_t svqrdmlah(svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_s16))) svint16_t svqrdmlah(svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_lane_s32))) svint32_t svqrdmlah_lane(svint32_t, svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_lane_s64))) svint64_t svqrdmlah_lane(svint64_t, svint64_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlah_lane_s16))) svint16_t svqrdmlah_lane(svint16_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_n_s8))) svint8_t svqrdmlsh(svint8_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_n_s32))) svint32_t svqrdmlsh(svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_n_s64))) svint64_t svqrdmlsh(svint64_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_n_s16))) svint16_t svqrdmlsh(svint16_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_s8))) svint8_t svqrdmlsh(svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_s32))) svint32_t svqrdmlsh(svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_s64))) svint64_t svqrdmlsh(svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_s16))) svint16_t svqrdmlsh(svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_lane_s32))) svint32_t svqrdmlsh_lane(svint32_t, svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_lane_s64))) svint64_t svqrdmlsh_lane(svint64_t, svint64_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmlsh_lane_s16))) svint16_t svqrdmlsh_lane(svint16_t, svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_n_s8))) svint8_t svqrdmulh(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_n_s32))) svint32_t svqrdmulh(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_n_s64))) svint64_t svqrdmulh(svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_n_s16))) svint16_t svqrdmulh(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_s8))) svint8_t svqrdmulh(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_s32))) svint32_t svqrdmulh(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_s64))) svint64_t svqrdmulh(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_s16))) svint16_t svqrdmulh(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_lane_s32))) svint32_t svqrdmulh_lane(svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_lane_s64))) svint64_t svqrdmulh_lane(svint64_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrdmulh_lane_s16))) svint16_t svqrdmulh_lane(svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s8_m))) svint8_t svqrshl_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s32_m))) svint32_t svqrshl_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s64_m))) svint64_t svqrshl_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s16_m))) svint16_t svqrshl_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s8_x))) svint8_t svqrshl_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s32_x))) svint32_t svqrshl_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s64_x))) svint64_t svqrshl_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s16_x))) svint16_t svqrshl_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s8_z))) svint8_t svqrshl_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s32_z))) svint32_t svqrshl_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s64_z))) svint64_t svqrshl_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_s16_z))) svint16_t svqrshl_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u8_m))) svuint8_t svqrshl_m(svbool_t, svuint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u32_m))) svuint32_t svqrshl_m(svbool_t, svuint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u64_m))) svuint64_t svqrshl_m(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u16_m))) svuint16_t svqrshl_m(svbool_t, svuint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u8_x))) svuint8_t svqrshl_x(svbool_t, svuint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u32_x))) svuint32_t svqrshl_x(svbool_t, svuint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u64_x))) svuint64_t svqrshl_x(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u16_x))) svuint16_t svqrshl_x(svbool_t, svuint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u8_z))) svuint8_t svqrshl_z(svbool_t, svuint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u32_z))) svuint32_t svqrshl_z(svbool_t, svuint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u64_z))) svuint64_t svqrshl_z(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_n_u16_z))) svuint16_t svqrshl_z(svbool_t, svuint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s8_m))) svint8_t svqrshl_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s32_m))) svint32_t svqrshl_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s64_m))) svint64_t svqrshl_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s16_m))) svint16_t svqrshl_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s8_x))) svint8_t svqrshl_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s32_x))) svint32_t svqrshl_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s64_x))) svint64_t svqrshl_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s16_x))) svint16_t svqrshl_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s8_z))) svint8_t svqrshl_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s32_z))) svint32_t svqrshl_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s64_z))) svint64_t svqrshl_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_s16_z))) svint16_t svqrshl_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u8_m))) svuint8_t svqrshl_m(svbool_t, svuint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u32_m))) svuint32_t svqrshl_m(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u64_m))) svuint64_t svqrshl_m(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u16_m))) svuint16_t svqrshl_m(svbool_t, svuint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u8_x))) svuint8_t svqrshl_x(svbool_t, svuint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u32_x))) svuint32_t svqrshl_x(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u64_x))) svuint64_t svqrshl_x(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u16_x))) svuint16_t svqrshl_x(svbool_t, svuint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u8_z))) svuint8_t svqrshl_z(svbool_t, svuint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u32_z))) svuint32_t svqrshl_z(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u64_z))) svuint64_t svqrshl_z(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshl_u16_z))) svuint16_t svqrshl_z(svbool_t, svuint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnb_n_s32))) svint16_t svqrshrnb(svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnb_n_s64))) svint32_t svqrshrnb(svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnb_n_s16))) svint8_t svqrshrnb(svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnb_n_u32))) svuint16_t svqrshrnb(svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnb_n_u64))) svuint32_t svqrshrnb(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnb_n_u16))) svuint8_t svqrshrnb(svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnt_n_s32))) svint16_t svqrshrnt(svint16_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnt_n_s64))) svint32_t svqrshrnt(svint32_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnt_n_s16))) svint8_t svqrshrnt(svint8_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnt_n_u32))) svuint16_t svqrshrnt(svuint16_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnt_n_u64))) svuint32_t svqrshrnt(svuint32_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrnt_n_u16))) svuint8_t svqrshrnt(svuint8_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrunb_n_s32))) svuint16_t svqrshrunb(svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrunb_n_s64))) svuint32_t svqrshrunb(svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrunb_n_s16))) svuint8_t svqrshrunb(svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrunt_n_s32))) svuint16_t svqrshrunt(svuint16_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrunt_n_s64))) svuint32_t svqrshrunt(svuint32_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrunt_n_s16))) svuint8_t svqrshrunt(svuint8_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s8_m))) svint8_t svqshl_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s32_m))) svint32_t svqshl_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s64_m))) svint64_t svqshl_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s16_m))) svint16_t svqshl_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s8_x))) svint8_t svqshl_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s32_x))) svint32_t svqshl_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s64_x))) svint64_t svqshl_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s16_x))) svint16_t svqshl_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s8_z))) svint8_t svqshl_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s32_z))) svint32_t svqshl_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s64_z))) svint64_t svqshl_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_s16_z))) svint16_t svqshl_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u8_m))) svuint8_t svqshl_m(svbool_t, svuint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u32_m))) svuint32_t svqshl_m(svbool_t, svuint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u64_m))) svuint64_t svqshl_m(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u16_m))) svuint16_t svqshl_m(svbool_t, svuint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u8_x))) svuint8_t svqshl_x(svbool_t, svuint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u32_x))) svuint32_t svqshl_x(svbool_t, svuint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u64_x))) svuint64_t svqshl_x(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u16_x))) svuint16_t svqshl_x(svbool_t, svuint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u8_z))) svuint8_t svqshl_z(svbool_t, svuint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u32_z))) svuint32_t svqshl_z(svbool_t, svuint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u64_z))) svuint64_t svqshl_z(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_n_u16_z))) svuint16_t svqshl_z(svbool_t, svuint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s8_m))) svint8_t svqshl_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s32_m))) svint32_t svqshl_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s64_m))) svint64_t svqshl_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s16_m))) svint16_t svqshl_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s8_x))) svint8_t svqshl_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s32_x))) svint32_t svqshl_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s64_x))) svint64_t svqshl_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s16_x))) svint16_t svqshl_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s8_z))) svint8_t svqshl_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s32_z))) svint32_t svqshl_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s64_z))) svint64_t svqshl_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_s16_z))) svint16_t svqshl_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u8_m))) svuint8_t svqshl_m(svbool_t, svuint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u32_m))) svuint32_t svqshl_m(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u64_m))) svuint64_t svqshl_m(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u16_m))) svuint16_t svqshl_m(svbool_t, svuint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u8_x))) svuint8_t svqshl_x(svbool_t, svuint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u32_x))) svuint32_t svqshl_x(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u64_x))) svuint64_t svqshl_x(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u16_x))) svuint16_t svqshl_x(svbool_t, svuint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u8_z))) svuint8_t svqshl_z(svbool_t, svuint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u32_z))) svuint32_t svqshl_z(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u64_z))) svuint64_t svqshl_z(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshl_u16_z))) svuint16_t svqshl_z(svbool_t, svuint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s8_m))) svuint8_t svqshlu_m(svbool_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s32_m))) svuint32_t svqshlu_m(svbool_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s64_m))) svuint64_t svqshlu_m(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s16_m))) svuint16_t svqshlu_m(svbool_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s8_x))) svuint8_t svqshlu_x(svbool_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s32_x))) svuint32_t svqshlu_x(svbool_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s64_x))) svuint64_t svqshlu_x(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s16_x))) svuint16_t svqshlu_x(svbool_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s8_z))) svuint8_t svqshlu_z(svbool_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s32_z))) svuint32_t svqshlu_z(svbool_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s64_z))) svuint64_t svqshlu_z(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshlu_n_s16_z))) svuint16_t svqshlu_z(svbool_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnb_n_s32))) svint16_t svqshrnb(svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnb_n_s64))) svint32_t svqshrnb(svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnb_n_s16))) svint8_t svqshrnb(svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnb_n_u32))) svuint16_t svqshrnb(svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnb_n_u64))) svuint32_t svqshrnb(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnb_n_u16))) svuint8_t svqshrnb(svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnt_n_s32))) svint16_t svqshrnt(svint16_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnt_n_s64))) svint32_t svqshrnt(svint32_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnt_n_s16))) svint8_t svqshrnt(svint8_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnt_n_u32))) svuint16_t svqshrnt(svuint16_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnt_n_u64))) svuint32_t svqshrnt(svuint32_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrnt_n_u16))) svuint8_t svqshrnt(svuint8_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrunb_n_s32))) svuint16_t svqshrunb(svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrunb_n_s64))) svuint32_t svqshrunb(svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrunb_n_s16))) svuint8_t svqshrunb(svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrunt_n_s32))) svuint16_t svqshrunt(svuint16_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrunt_n_s64))) svuint32_t svqshrunt(svuint32_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqshrunt_n_s16))) svuint8_t svqshrunt(svuint8_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s8_m))) svint8_t svqsub_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s32_m))) svint32_t svqsub_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s64_m))) svint64_t svqsub_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s16_m))) svint16_t svqsub_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s8_x))) svint8_t svqsub_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s32_x))) svint32_t svqsub_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s64_x))) svint64_t svqsub_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s16_x))) svint16_t svqsub_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s8_z))) svint8_t svqsub_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s32_z))) svint32_t svqsub_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s64_z))) svint64_t svqsub_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s16_z))) svint16_t svqsub_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u8_m))) svuint8_t svqsub_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u32_m))) svuint32_t svqsub_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u64_m))) svuint64_t svqsub_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u16_m))) svuint16_t svqsub_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u8_x))) svuint8_t svqsub_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u32_x))) svuint32_t svqsub_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u64_x))) svuint64_t svqsub_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u16_x))) svuint16_t svqsub_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u8_z))) svuint8_t svqsub_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u32_z))) svuint32_t svqsub_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u64_z))) svuint64_t svqsub_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_u16_z))) svuint16_t svqsub_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s8_m))) svint8_t svqsub_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s32_m))) svint32_t svqsub_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s64_m))) svint64_t svqsub_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s16_m))) svint16_t svqsub_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s8_x))) svint8_t svqsub_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s32_x))) svint32_t svqsub_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s64_x))) svint64_t svqsub_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s16_x))) svint16_t svqsub_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s8_z))) svint8_t svqsub_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s32_z))) svint32_t svqsub_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s64_z))) svint64_t svqsub_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_s16_z))) svint16_t svqsub_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u8_m))) svuint8_t svqsub_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u32_m))) svuint32_t svqsub_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u64_m))) svuint64_t svqsub_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u16_m))) svuint16_t svqsub_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u8_x))) svuint8_t svqsub_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u32_x))) svuint32_t svqsub_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u64_x))) svuint64_t svqsub_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u16_x))) svuint16_t svqsub_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u8_z))) svuint8_t svqsub_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u32_z))) svuint32_t svqsub_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u64_z))) svuint64_t svqsub_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_u16_z))) svuint16_t svqsub_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s8_m))) svint8_t svqsubr_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s32_m))) svint32_t svqsubr_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s64_m))) svint64_t svqsubr_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s16_m))) svint16_t svqsubr_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s8_x))) svint8_t svqsubr_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s32_x))) svint32_t svqsubr_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s64_x))) svint64_t svqsubr_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s16_x))) svint16_t svqsubr_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s8_z))) svint8_t svqsubr_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s32_z))) svint32_t svqsubr_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s64_z))) svint64_t svqsubr_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_s16_z))) svint16_t svqsubr_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u8_m))) svuint8_t svqsubr_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u32_m))) svuint32_t svqsubr_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u64_m))) svuint64_t svqsubr_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u16_m))) svuint16_t svqsubr_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u8_x))) svuint8_t svqsubr_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u32_x))) svuint32_t svqsubr_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u64_x))) svuint64_t svqsubr_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u16_x))) svuint16_t svqsubr_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u8_z))) svuint8_t svqsubr_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u32_z))) svuint32_t svqsubr_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u64_z))) svuint64_t svqsubr_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_n_u16_z))) svuint16_t svqsubr_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s8_m))) svint8_t svqsubr_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s32_m))) svint32_t svqsubr_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s64_m))) svint64_t svqsubr_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s16_m))) svint16_t svqsubr_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s8_x))) svint8_t svqsubr_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s32_x))) svint32_t svqsubr_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s64_x))) svint64_t svqsubr_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s16_x))) svint16_t svqsubr_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s8_z))) svint8_t svqsubr_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s32_z))) svint32_t svqsubr_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s64_z))) svint64_t svqsubr_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_s16_z))) svint16_t svqsubr_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u8_m))) svuint8_t svqsubr_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u32_m))) svuint32_t svqsubr_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u64_m))) svuint64_t svqsubr_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u16_m))) svuint16_t svqsubr_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u8_x))) svuint8_t svqsubr_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u32_x))) svuint32_t svqsubr_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u64_x))) svuint64_t svqsubr_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u16_x))) svuint16_t svqsubr_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u8_z))) svuint8_t svqsubr_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u32_z))) svuint32_t svqsubr_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u64_z))) svuint64_t svqsubr_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsubr_u16_z))) svuint16_t svqsubr_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnb_s32))) svint16_t svqxtnb(svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnb_s64))) svint32_t svqxtnb(svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnb_s16))) svint8_t svqxtnb(svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnb_u32))) svuint16_t svqxtnb(svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnb_u64))) svuint32_t svqxtnb(svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnb_u16))) svuint8_t svqxtnb(svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnt_s32))) svint16_t svqxtnt(svint16_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnt_s64))) svint32_t svqxtnt(svint32_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnt_s16))) svint8_t svqxtnt(svint8_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnt_u32))) svuint16_t svqxtnt(svuint16_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnt_u64))) svuint32_t svqxtnt(svuint32_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtnt_u16))) svuint8_t svqxtnt(svuint8_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtunb_s32))) svuint16_t svqxtunb(svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtunb_s64))) svuint32_t svqxtunb(svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtunb_s16))) svuint8_t svqxtunb(svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtunt_s32))) svuint16_t svqxtunt(svuint16_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtunt_s64))) svuint32_t svqxtunt(svuint32_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqxtunt_s16))) svuint8_t svqxtunt(svuint8_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_n_u32))) svuint16_t svraddhnb(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_n_u64))) svuint32_t svraddhnb(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_n_u16))) svuint8_t svraddhnb(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_n_s32))) svint16_t svraddhnb(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_n_s64))) svint32_t svraddhnb(svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_n_s16))) svint8_t svraddhnb(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_u32))) svuint16_t svraddhnb(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_u64))) svuint32_t svraddhnb(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_u16))) svuint8_t svraddhnb(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_s32))) svint16_t svraddhnb(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_s64))) svint32_t svraddhnb(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnb_s16))) svint8_t svraddhnb(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_n_u32))) svuint16_t svraddhnt(svuint16_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_n_u64))) svuint32_t svraddhnt(svuint32_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_n_u16))) svuint8_t svraddhnt(svuint8_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_n_s32))) svint16_t svraddhnt(svint16_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_n_s64))) svint32_t svraddhnt(svint32_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_n_s16))) svint8_t svraddhnt(svint8_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_u32))) svuint16_t svraddhnt(svuint16_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_u64))) svuint32_t svraddhnt(svuint32_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_u16))) svuint8_t svraddhnt(svuint8_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_s32))) svint16_t svraddhnt(svint16_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_s64))) svint32_t svraddhnt(svint32_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svraddhnt_s16))) svint8_t svraddhnt(svint8_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpe_u32_m))) svuint32_t svrecpe_m(svuint32_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpe_u32_x))) svuint32_t svrecpe_x(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrecpe_u32_z))) svuint32_t svrecpe_z(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s8_m))) svint8_t svrhadd_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s32_m))) svint32_t svrhadd_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s64_m))) svint64_t svrhadd_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s16_m))) svint16_t svrhadd_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s8_x))) svint8_t svrhadd_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s32_x))) svint32_t svrhadd_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s64_x))) svint64_t svrhadd_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s16_x))) svint16_t svrhadd_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s8_z))) svint8_t svrhadd_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s32_z))) svint32_t svrhadd_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s64_z))) svint64_t svrhadd_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_s16_z))) svint16_t svrhadd_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u8_m))) svuint8_t svrhadd_m(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u32_m))) svuint32_t svrhadd_m(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u64_m))) svuint64_t svrhadd_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u16_m))) svuint16_t svrhadd_m(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u8_x))) svuint8_t svrhadd_x(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u32_x))) svuint32_t svrhadd_x(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u64_x))) svuint64_t svrhadd_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u16_x))) svuint16_t svrhadd_x(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u8_z))) svuint8_t svrhadd_z(svbool_t, svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u32_z))) svuint32_t svrhadd_z(svbool_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u64_z))) svuint64_t svrhadd_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_n_u16_z))) svuint16_t svrhadd_z(svbool_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s8_m))) svint8_t svrhadd_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s32_m))) svint32_t svrhadd_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s64_m))) svint64_t svrhadd_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s16_m))) svint16_t svrhadd_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s8_x))) svint8_t svrhadd_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s32_x))) svint32_t svrhadd_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s64_x))) svint64_t svrhadd_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s16_x))) svint16_t svrhadd_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s8_z))) svint8_t svrhadd_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s32_z))) svint32_t svrhadd_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s64_z))) svint64_t svrhadd_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_s16_z))) svint16_t svrhadd_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u8_m))) svuint8_t svrhadd_m(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u32_m))) svuint32_t svrhadd_m(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u64_m))) svuint64_t svrhadd_m(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u16_m))) svuint16_t svrhadd_m(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u8_x))) svuint8_t svrhadd_x(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u32_x))) svuint32_t svrhadd_x(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u64_x))) svuint64_t svrhadd_x(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u16_x))) svuint16_t svrhadd_x(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u8_z))) svuint8_t svrhadd_z(svbool_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u32_z))) svuint32_t svrhadd_z(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u64_z))) svuint64_t svrhadd_z(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrhadd_u16_z))) svuint16_t svrhadd_z(svbool_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s8_m))) svint8_t svrshl_m(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s32_m))) svint32_t svrshl_m(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s64_m))) svint64_t svrshl_m(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s16_m))) svint16_t svrshl_m(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s8_x))) svint8_t svrshl_x(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s32_x))) svint32_t svrshl_x(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s64_x))) svint64_t svrshl_x(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s16_x))) svint16_t svrshl_x(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s8_z))) svint8_t svrshl_z(svbool_t, svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s32_z))) svint32_t svrshl_z(svbool_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s64_z))) svint64_t svrshl_z(svbool_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_s16_z))) svint16_t svrshl_z(svbool_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u8_m))) svuint8_t svrshl_m(svbool_t, svuint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u32_m))) svuint32_t svrshl_m(svbool_t, svuint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u64_m))) svuint64_t svrshl_m(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u16_m))) svuint16_t svrshl_m(svbool_t, svuint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u8_x))) svuint8_t svrshl_x(svbool_t, svuint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u32_x))) svuint32_t svrshl_x(svbool_t, svuint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u64_x))) svuint64_t svrshl_x(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u16_x))) svuint16_t svrshl_x(svbool_t, svuint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u8_z))) svuint8_t svrshl_z(svbool_t, svuint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u32_z))) svuint32_t svrshl_z(svbool_t, svuint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u64_z))) svuint64_t svrshl_z(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_n_u16_z))) svuint16_t svrshl_z(svbool_t, svuint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s8_m))) svint8_t svrshl_m(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s32_m))) svint32_t svrshl_m(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s64_m))) svint64_t svrshl_m(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s16_m))) svint16_t svrshl_m(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s8_x))) svint8_t svrshl_x(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s32_x))) svint32_t svrshl_x(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s64_x))) svint64_t svrshl_x(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s16_x))) svint16_t svrshl_x(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s8_z))) svint8_t svrshl_z(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s32_z))) svint32_t svrshl_z(svbool_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s64_z))) svint64_t svrshl_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_s16_z))) svint16_t svrshl_z(svbool_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u8_m))) svuint8_t svrshl_m(svbool_t, svuint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u32_m))) svuint32_t svrshl_m(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u64_m))) svuint64_t svrshl_m(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u16_m))) svuint16_t svrshl_m(svbool_t, svuint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u8_x))) svuint8_t svrshl_x(svbool_t, svuint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u32_x))) svuint32_t svrshl_x(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u64_x))) svuint64_t svrshl_x(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u16_x))) svuint16_t svrshl_x(svbool_t, svuint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u8_z))) svuint8_t svrshl_z(svbool_t, svuint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u32_z))) svuint32_t svrshl_z(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u64_z))) svuint64_t svrshl_z(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshl_u16_z))) svuint16_t svrshl_z(svbool_t, svuint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s8_m))) svint8_t svrshr_m(svbool_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s32_m))) svint32_t svrshr_m(svbool_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s64_m))) svint64_t svrshr_m(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s16_m))) svint16_t svrshr_m(svbool_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u8_m))) svuint8_t svrshr_m(svbool_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u32_m))) svuint32_t svrshr_m(svbool_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u64_m))) svuint64_t svrshr_m(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u16_m))) svuint16_t svrshr_m(svbool_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s8_x))) svint8_t svrshr_x(svbool_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s32_x))) svint32_t svrshr_x(svbool_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s64_x))) svint64_t svrshr_x(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s16_x))) svint16_t svrshr_x(svbool_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u8_x))) svuint8_t svrshr_x(svbool_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u32_x))) svuint32_t svrshr_x(svbool_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u64_x))) svuint64_t svrshr_x(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u16_x))) svuint16_t svrshr_x(svbool_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s8_z))) svint8_t svrshr_z(svbool_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s32_z))) svint32_t svrshr_z(svbool_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s64_z))) svint64_t svrshr_z(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_s16_z))) svint16_t svrshr_z(svbool_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u8_z))) svuint8_t svrshr_z(svbool_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u32_z))) svuint32_t svrshr_z(svbool_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u64_z))) svuint64_t svrshr_z(svbool_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshr_n_u16_z))) svuint16_t svrshr_z(svbool_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnb_n_u32))) svuint16_t svrshrnb(svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnb_n_u64))) svuint32_t svrshrnb(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnb_n_u16))) svuint8_t svrshrnb(svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnb_n_s32))) svint16_t svrshrnb(svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnb_n_s64))) svint32_t svrshrnb(svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnb_n_s16))) svint8_t svrshrnb(svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnt_n_u32))) svuint16_t svrshrnt(svuint16_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnt_n_u64))) svuint32_t svrshrnt(svuint32_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnt_n_u16))) svuint8_t svrshrnt(svuint8_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnt_n_s32))) svint16_t svrshrnt(svint16_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnt_n_s64))) svint32_t svrshrnt(svint32_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrshrnt_n_s16))) svint8_t svrshrnt(svint8_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrte_u32_m))) svuint32_t svrsqrte_m(svuint32_t, svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrte_u32_x))) svuint32_t svrsqrte_x(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsqrte_u32_z))) svuint32_t svrsqrte_z(svbool_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsra_n_s8))) svint8_t svrsra(svint8_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsra_n_s32))) svint32_t svrsra(svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsra_n_s64))) svint64_t svrsra(svint64_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsra_n_s16))) svint16_t svrsra(svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsra_n_u8))) svuint8_t svrsra(svuint8_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsra_n_u32))) svuint32_t svrsra(svuint32_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsra_n_u64))) svuint64_t svrsra(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsra_n_u16))) svuint16_t svrsra(svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_n_u32))) svuint16_t svrsubhnb(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_n_u64))) svuint32_t svrsubhnb(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_n_u16))) svuint8_t svrsubhnb(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_n_s32))) svint16_t svrsubhnb(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_n_s64))) svint32_t svrsubhnb(svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_n_s16))) svint8_t svrsubhnb(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_u32))) svuint16_t svrsubhnb(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_u64))) svuint32_t svrsubhnb(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_u16))) svuint8_t svrsubhnb(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_s32))) svint16_t svrsubhnb(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_s64))) svint32_t svrsubhnb(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnb_s16))) svint8_t svrsubhnb(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_n_u32))) svuint16_t svrsubhnt(svuint16_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_n_u64))) svuint32_t svrsubhnt(svuint32_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_n_u16))) svuint8_t svrsubhnt(svuint8_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_n_s32))) svint16_t svrsubhnt(svint16_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_n_s64))) svint32_t svrsubhnt(svint32_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_n_s16))) svint8_t svrsubhnt(svint8_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_u32))) svuint16_t svrsubhnt(svuint16_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_u64))) svuint32_t svrsubhnt(svuint32_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_u16))) svuint8_t svrsubhnt(svuint8_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_s32))) svint16_t svrsubhnt(svint16_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_s64))) svint32_t svrsubhnt(svint32_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrsubhnt_s16))) svint8_t svrsubhnt(svint8_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsbclb_n_u32))) svuint32_t svsbclb(svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsbclb_n_u64))) svuint64_t svsbclb(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsbclb_u32))) svuint32_t svsbclb(svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsbclb_u64))) svuint64_t svsbclb(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsbclt_n_u32))) svuint32_t svsbclt(svuint32_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsbclt_n_u64))) svuint64_t svsbclt(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsbclt_u32))) svuint32_t svsbclt(svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsbclt_u64))) svuint64_t svsbclt(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllb_n_s32))) svint32_t svshllb(svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllb_n_s64))) svint64_t svshllb(svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllb_n_s16))) svint16_t svshllb(svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllb_n_u32))) svuint32_t svshllb(svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllb_n_u64))) svuint64_t svshllb(svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllb_n_u16))) svuint16_t svshllb(svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllt_n_s32))) svint32_t svshllt(svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllt_n_s64))) svint64_t svshllt(svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllt_n_s16))) svint16_t svshllt(svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllt_n_u32))) svuint32_t svshllt(svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllt_n_u64))) svuint64_t svshllt(svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshllt_n_u16))) svuint16_t svshllt(svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnb_n_u32))) svuint16_t svshrnb(svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnb_n_u64))) svuint32_t svshrnb(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnb_n_u16))) svuint8_t svshrnb(svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnb_n_s32))) svint16_t svshrnb(svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnb_n_s64))) svint32_t svshrnb(svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnb_n_s16))) svint8_t svshrnb(svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnt_n_u32))) svuint16_t svshrnt(svuint16_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnt_n_u64))) svuint32_t svshrnt(svuint32_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnt_n_u16))) svuint8_t svshrnt(svuint8_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnt_n_s32))) svint16_t svshrnt(svint16_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnt_n_s64))) svint32_t svshrnt(svint32_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svshrnt_n_s16))) svint8_t svshrnt(svint8_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsli_n_u8))) svuint8_t svsli(svuint8_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsli_n_u32))) svuint32_t svsli(svuint32_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsli_n_u64))) svuint64_t svsli(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsli_n_u16))) svuint16_t svsli(svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsli_n_s8))) svint8_t svsli(svint8_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsli_n_s32))) svint32_t svsli(svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsli_n_s64))) svint64_t svsli(svint64_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsli_n_s16))) svint16_t svsli(svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u8_m))) svuint8_t svsqadd_m(svbool_t, svuint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u32_m))) svuint32_t svsqadd_m(svbool_t, svuint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u64_m))) svuint64_t svsqadd_m(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u16_m))) svuint16_t svsqadd_m(svbool_t, svuint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u8_x))) svuint8_t svsqadd_x(svbool_t, svuint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u32_x))) svuint32_t svsqadd_x(svbool_t, svuint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u64_x))) svuint64_t svsqadd_x(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u16_x))) svuint16_t svsqadd_x(svbool_t, svuint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u8_z))) svuint8_t svsqadd_z(svbool_t, svuint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u32_z))) svuint32_t svsqadd_z(svbool_t, svuint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u64_z))) svuint64_t svsqadd_z(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_n_u16_z))) svuint16_t svsqadd_z(svbool_t, svuint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u8_m))) svuint8_t svsqadd_m(svbool_t, svuint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u32_m))) svuint32_t svsqadd_m(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u64_m))) svuint64_t svsqadd_m(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u16_m))) svuint16_t svsqadd_m(svbool_t, svuint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u8_x))) svuint8_t svsqadd_x(svbool_t, svuint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u32_x))) svuint32_t svsqadd_x(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u64_x))) svuint64_t svsqadd_x(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u16_x))) svuint16_t svsqadd_x(svbool_t, svuint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u8_z))) svuint8_t svsqadd_z(svbool_t, svuint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u32_z))) svuint32_t svsqadd_z(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u64_z))) svuint64_t svsqadd_z(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsqadd_u16_z))) svuint16_t svsqadd_z(svbool_t, svuint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsra_n_s8))) svint8_t svsra(svint8_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsra_n_s32))) svint32_t svsra(svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsra_n_s64))) svint64_t svsra(svint64_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsra_n_s16))) svint16_t svsra(svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsra_n_u8))) svuint8_t svsra(svuint8_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsra_n_u32))) svuint32_t svsra(svuint32_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsra_n_u64))) svuint64_t svsra(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsra_n_u16))) svuint16_t svsra(svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsri_n_u8))) svuint8_t svsri(svuint8_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsri_n_u32))) svuint32_t svsri(svuint32_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsri_n_u64))) svuint64_t svsri(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsri_n_u16))) svuint16_t svsri(svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsri_n_s8))) svint8_t svsri(svint8_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsri_n_s32))) svint32_t svsri(svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsri_n_s64))) svint64_t svsri(svint64_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsri_n_s16))) svint16_t svsri(svint16_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_u32))) void svstnt1_scatter_index(svbool_t, svuint32_t, int64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_u64))) void svstnt1_scatter_index(svbool_t, svuint64_t, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_f64))) void svstnt1_scatter_index(svbool_t, svuint64_t, int64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_f32))) void svstnt1_scatter_index(svbool_t, svuint32_t, int64_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_s32))) void svstnt1_scatter_index(svbool_t, svuint32_t, int64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_s64))) void svstnt1_scatter_index(svbool_t, svuint64_t, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_u32))) void svstnt1_scatter_offset(svbool_t, svuint32_t, int64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_u64))) void svstnt1_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_f64))) void svstnt1_scatter_offset(svbool_t, svuint64_t, int64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_f32))) void svstnt1_scatter_offset(svbool_t, svuint32_t, int64_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_s32))) void svstnt1_scatter_offset(svbool_t, svuint32_t, int64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_s64))) void svstnt1_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_u32))) void svstnt1_scatter(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_u64))) void svstnt1_scatter(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_f64))) void svstnt1_scatter(svbool_t, svuint64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_f32))) void svstnt1_scatter(svbool_t, svuint32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_s32))) void svstnt1_scatter(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_s64))) void svstnt1_scatter(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_u64))) void svstnt1_scatter_index(svbool_t, uint64_t *, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_f64))) void svstnt1_scatter_index(svbool_t, float64_t *, svint64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_s64))) void svstnt1_scatter_index(svbool_t, int64_t *, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_u64))) void svstnt1_scatter_index(svbool_t, uint64_t *, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_f64))) void svstnt1_scatter_index(svbool_t, float64_t *, svuint64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_s64))) void svstnt1_scatter_index(svbool_t, int64_t *, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_u32))) void svstnt1_scatter_offset(svbool_t, uint32_t *, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_f32))) void svstnt1_scatter_offset(svbool_t, float32_t *, svuint32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_s32))) void svstnt1_scatter_offset(svbool_t, int32_t *, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_u64))) void svstnt1_scatter_offset(svbool_t, uint64_t *, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_f64))) void svstnt1_scatter_offset(svbool_t, float64_t *, svint64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_s64))) void svstnt1_scatter_offset(svbool_t, int64_t *, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_u64))) void svstnt1_scatter_offset(svbool_t, uint64_t *, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_f64))) void svstnt1_scatter_offset(svbool_t, float64_t *, svuint64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_s64))) void svstnt1_scatter_offset(svbool_t, int64_t *, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_offset_u32))) void svstnt1b_scatter_offset(svbool_t, svuint32_t, int64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_offset_u64))) void svstnt1b_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_offset_s32))) void svstnt1b_scatter_offset(svbool_t, svuint32_t, int64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_offset_s64))) void svstnt1b_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_u32))) void svstnt1b_scatter(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_u64))) void svstnt1b_scatter(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_s32))) void svstnt1b_scatter(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_s64))) void svstnt1b_scatter(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32offset_s32))) void svstnt1b_scatter_offset(svbool_t, int8_t *, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32offset_u32))) void svstnt1b_scatter_offset(svbool_t, uint8_t *, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_s64offset_s64))) void svstnt1b_scatter_offset(svbool_t, int8_t *, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_s64offset_u64))) void svstnt1b_scatter_offset(svbool_t, uint8_t *, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64offset_s64))) void svstnt1b_scatter_offset(svbool_t, int8_t *, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64offset_u64))) void svstnt1b_scatter_offset(svbool_t, uint8_t *, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_index_u32))) void svstnt1h_scatter_index(svbool_t, svuint32_t, int64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_index_u64))) void svstnt1h_scatter_index(svbool_t, svuint64_t, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_index_s32))) void svstnt1h_scatter_index(svbool_t, svuint32_t, int64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_index_s64))) void svstnt1h_scatter_index(svbool_t, svuint64_t, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_offset_u32))) void svstnt1h_scatter_offset(svbool_t, svuint32_t, int64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_offset_u64))) void svstnt1h_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_offset_s32))) void svstnt1h_scatter_offset(svbool_t, svuint32_t, int64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_offset_s64))) void svstnt1h_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_u32))) void svstnt1h_scatter(svbool_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_u64))) void svstnt1h_scatter(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_s32))) void svstnt1h_scatter(svbool_t, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_s64))) void svstnt1h_scatter(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64index_s64))) void svstnt1h_scatter_index(svbool_t, int16_t *, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64index_u64))) void svstnt1h_scatter_index(svbool_t, uint16_t *, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64index_s64))) void svstnt1h_scatter_index(svbool_t, int16_t *, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64index_u64))) void svstnt1h_scatter_index(svbool_t, uint16_t *, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32offset_s32))) void svstnt1h_scatter_offset(svbool_t, int16_t *, svuint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32offset_u32))) void svstnt1h_scatter_offset(svbool_t, uint16_t *, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64offset_s64))) void svstnt1h_scatter_offset(svbool_t, int16_t *, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64offset_u64))) void svstnt1h_scatter_offset(svbool_t, uint16_t *, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64offset_s64))) void svstnt1h_scatter_offset(svbool_t, int16_t *, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64offset_u64))) void svstnt1h_scatter_offset(svbool_t, uint16_t *, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_index_u64))) void svstnt1w_scatter_index(svbool_t, svuint64_t, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_index_s64))) void svstnt1w_scatter_index(svbool_t, svuint64_t, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_offset_u64))) void svstnt1w_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_offset_s64))) void svstnt1w_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_u64))) void svstnt1w_scatter(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_s64))) void svstnt1w_scatter(svbool_t, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64index_s64))) void svstnt1w_scatter_index(svbool_t, int32_t *, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64index_u64))) void svstnt1w_scatter_index(svbool_t, uint32_t *, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64index_s64))) void svstnt1w_scatter_index(svbool_t, int32_t *, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64index_u64))) void svstnt1w_scatter_index(svbool_t, uint32_t *, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64offset_s64))) void svstnt1w_scatter_offset(svbool_t, int32_t *, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64offset_u64))) void svstnt1w_scatter_offset(svbool_t, uint32_t *, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64offset_s64))) void svstnt1w_scatter_offset(svbool_t, int32_t *, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64offset_u64))) void svstnt1w_scatter_offset(svbool_t, uint32_t *, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_n_u32))) svuint16_t svsubhnb(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_n_u64))) svuint32_t svsubhnb(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_n_u16))) svuint8_t svsubhnb(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_n_s32))) svint16_t svsubhnb(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_n_s64))) svint32_t svsubhnb(svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_n_s16))) svint8_t svsubhnb(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_u32))) svuint16_t svsubhnb(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_u64))) svuint32_t svsubhnb(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_u16))) svuint8_t svsubhnb(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_s32))) svint16_t svsubhnb(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_s64))) svint32_t svsubhnb(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnb_s16))) svint8_t svsubhnb(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_n_u32))) svuint16_t svsubhnt(svuint16_t, svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_n_u64))) svuint32_t svsubhnt(svuint32_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_n_u16))) svuint8_t svsubhnt(svuint8_t, svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_n_s32))) svint16_t svsubhnt(svint16_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_n_s64))) svint32_t svsubhnt(svint32_t, svint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_n_s16))) svint8_t svsubhnt(svint8_t, svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_u32))) svuint16_t svsubhnt(svuint16_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_u64))) svuint32_t svsubhnt(svuint32_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_u16))) svuint8_t svsubhnt(svuint8_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_s32))) svint16_t svsubhnt(svint16_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_s64))) svint32_t svsubhnt(svint32_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubhnt_s16))) svint8_t svsubhnt(svint8_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_n_s32))) svint32_t svsublb(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_n_s64))) svint64_t svsublb(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_n_s16))) svint16_t svsublb(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_n_u32))) svuint32_t svsublb(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_n_u64))) svuint64_t svsublb(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_n_u16))) svuint16_t svsublb(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_s32))) svint32_t svsublb(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_s64))) svint64_t svsublb(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_s16))) svint16_t svsublb(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_u32))) svuint32_t svsublb(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_u64))) svuint64_t svsublb(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublb_u16))) svuint16_t svsublb(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublbt_n_s32))) svint32_t svsublbt(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublbt_n_s64))) svint64_t svsublbt(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublbt_n_s16))) svint16_t svsublbt(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublbt_s32))) svint32_t svsublbt(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublbt_s64))) svint64_t svsublbt(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublbt_s16))) svint16_t svsublbt(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_n_s32))) svint32_t svsublt(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_n_s64))) svint64_t svsublt(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_n_s16))) svint16_t svsublt(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_n_u32))) svuint32_t svsublt(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_n_u64))) svuint64_t svsublt(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_n_u16))) svuint16_t svsublt(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_s32))) svint32_t svsublt(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_s64))) svint64_t svsublt(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_s16))) svint16_t svsublt(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_u32))) svuint32_t svsublt(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_u64))) svuint64_t svsublt(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsublt_u16))) svuint16_t svsublt(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubltb_n_s32))) svint32_t svsubltb(svint16_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubltb_n_s64))) svint64_t svsubltb(svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubltb_n_s16))) svint16_t svsubltb(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubltb_s32))) svint32_t svsubltb(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubltb_s64))) svint64_t svsubltb(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubltb_s16))) svint16_t svsubltb(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_n_s32))) svint32_t svsubwb(svint32_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_n_s64))) svint64_t svsubwb(svint64_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_n_s16))) svint16_t svsubwb(svint16_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_n_u32))) svuint32_t svsubwb(svuint32_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_n_u64))) svuint64_t svsubwb(svuint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_n_u16))) svuint16_t svsubwb(svuint16_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_s32))) svint32_t svsubwb(svint32_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_s64))) svint64_t svsubwb(svint64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_s16))) svint16_t svsubwb(svint16_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_u32))) svuint32_t svsubwb(svuint32_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_u64))) svuint64_t svsubwb(svuint64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwb_u16))) svuint16_t svsubwb(svuint16_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_n_s32))) svint32_t svsubwt(svint32_t, int16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_n_s64))) svint64_t svsubwt(svint64_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_n_s16))) svint16_t svsubwt(svint16_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_n_u32))) svuint32_t svsubwt(svuint32_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_n_u64))) svuint64_t svsubwt(svuint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_n_u16))) svuint16_t svsubwt(svuint16_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_s32))) svint32_t svsubwt(svint32_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_s64))) svint64_t svsubwt(svint64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_s16))) svint16_t svsubwt(svint16_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_u32))) svuint32_t svsubwt(svuint32_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_u64))) svuint64_t svsubwt(svuint64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsubwt_u16))) svuint16_t svsubwt(svuint16_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_u8))) svuint8_t svtbl2(svuint8x2_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_u32))) svuint32_t svtbl2(svuint32x2_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_u64))) svuint64_t svtbl2(svuint64x2_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_u16))) svuint16_t svtbl2(svuint16x2_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_s8))) svint8_t svtbl2(svint8x2_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_f64))) svfloat64_t svtbl2(svfloat64x2_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_f32))) svfloat32_t svtbl2(svfloat32x2_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_f16))) svfloat16_t svtbl2(svfloat16x2_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_s32))) svint32_t svtbl2(svint32x2_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_s64))) svint64_t svtbl2(svint64x2_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_s16))) svint16_t svtbl2(svint16x2_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_u8))) svuint8_t svtbx(svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_u32))) svuint32_t svtbx(svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_u64))) svuint64_t svtbx(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_u16))) svuint16_t svtbx(svuint16_t, svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_s8))) svint8_t svtbx(svint8_t, svint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_f64))) svfloat64_t svtbx(svfloat64_t, svfloat64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_f32))) svfloat32_t svtbx(svfloat32_t, svfloat32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_f16))) svfloat16_t svtbx(svfloat16_t, svfloat16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_s32))) svint32_t svtbx(svint32_t, svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_s64))) svint64_t svtbx(svint64_t, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_s16))) svint16_t svtbx(svint16_t, svint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s8_m))) svint8_t svuqadd_m(svbool_t, svint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s32_m))) svint32_t svuqadd_m(svbool_t, svint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s64_m))) svint64_t svuqadd_m(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s16_m))) svint16_t svuqadd_m(svbool_t, svint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s8_x))) svint8_t svuqadd_x(svbool_t, svint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s32_x))) svint32_t svuqadd_x(svbool_t, svint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s64_x))) svint64_t svuqadd_x(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s16_x))) svint16_t svuqadd_x(svbool_t, svint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s8_z))) svint8_t svuqadd_z(svbool_t, svint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s32_z))) svint32_t svuqadd_z(svbool_t, svint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s64_z))) svint64_t svuqadd_z(svbool_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_n_s16_z))) svint16_t svuqadd_z(svbool_t, svint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s8_m))) svint8_t svuqadd_m(svbool_t, svint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s32_m))) svint32_t svuqadd_m(svbool_t, svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s64_m))) svint64_t svuqadd_m(svbool_t, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s16_m))) svint16_t svuqadd_m(svbool_t, svint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s8_x))) svint8_t svuqadd_x(svbool_t, svint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s32_x))) svint32_t svuqadd_x(svbool_t, svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s64_x))) svint64_t svuqadd_x(svbool_t, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s16_x))) svint16_t svuqadd_x(svbool_t, svint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s8_z))) svint8_t svuqadd_z(svbool_t, svint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s32_z))) svint32_t svuqadd_z(svbool_t, svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s64_z))) svint64_t svuqadd_z(svbool_t, svint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuqadd_s16_z))) svint16_t svuqadd_z(svbool_t, svint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_s32))) svbool_t svwhilege_b8(int32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_s32))) svbool_t svwhilege_b32(int32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_s32))) svbool_t svwhilege_b64(int32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_s32))) svbool_t svwhilege_b16(int32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_s64))) svbool_t svwhilege_b8(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_s64))) svbool_t svwhilege_b32(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_s64))) svbool_t svwhilege_b64(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_s64))) svbool_t svwhilege_b16(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_u32))) svbool_t svwhilege_b8(uint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_u32))) svbool_t svwhilege_b32(uint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_u32))) svbool_t svwhilege_b64(uint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_u32))) svbool_t svwhilege_b16(uint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_u64))) svbool_t svwhilege_b8(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_u64))) svbool_t svwhilege_b32(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_u64))) svbool_t svwhilege_b64(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_u64))) svbool_t svwhilege_b16(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_s32))) svbool_t svwhilegt_b8(int32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_s32))) svbool_t svwhilegt_b32(int32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_s32))) svbool_t svwhilegt_b64(int32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_s32))) svbool_t svwhilegt_b16(int32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_s64))) svbool_t svwhilegt_b8(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_s64))) svbool_t svwhilegt_b32(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_s64))) svbool_t svwhilegt_b64(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_s64))) svbool_t svwhilegt_b16(int64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_u32))) svbool_t svwhilegt_b8(uint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_u32))) svbool_t svwhilegt_b32(uint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_u32))) svbool_t svwhilegt_b64(uint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_u32))) svbool_t svwhilegt_b16(uint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_u64))) svbool_t svwhilegt_b8(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_u64))) svbool_t svwhilegt_b32(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_u64))) svbool_t svwhilegt_b64(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_u64))) svbool_t svwhilegt_b16(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_u8))) svbool_t svwhilerw(uint8_t const *, uint8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_s8))) svbool_t svwhilerw(int8_t const *, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_u64))) svbool_t svwhilerw(uint64_t const *, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_f64))) svbool_t svwhilerw(float64_t const *, float64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_s64))) svbool_t svwhilerw(int64_t const *, int64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_u16))) svbool_t svwhilerw(uint16_t const *, uint16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_f16))) svbool_t svwhilerw(float16_t const *, float16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_s16))) svbool_t svwhilerw(int16_t const *, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_u32))) svbool_t svwhilerw(uint32_t const *, uint32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_f32))) svbool_t svwhilerw(float32_t const *, float32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_s32))) svbool_t svwhilerw(int32_t const *, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_u8))) svbool_t svwhilewr(uint8_t const *, uint8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_s8))) svbool_t svwhilewr(int8_t const *, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_u64))) svbool_t svwhilewr(uint64_t const *, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_f64))) svbool_t svwhilewr(float64_t const *, float64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_s64))) svbool_t svwhilewr(int64_t const *, int64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_u16))) svbool_t svwhilewr(uint16_t const *, uint16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_f16))) svbool_t svwhilewr(float16_t const *, float16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_s16))) svbool_t svwhilewr(int16_t const *, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_u32))) svbool_t svwhilewr(uint32_t const *, uint32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_f32))) svbool_t svwhilewr(float32_t const *, float32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_s32))) svbool_t svwhilewr(int32_t const *, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_u8))) svuint8_t svxar(svuint8_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_u32))) svuint32_t svxar(svuint32_t, svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_u64))) svuint64_t svxar(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_u16))) svuint16_t svxar(svuint16_t, svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_s8))) svint8_t svxar(svint8_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_s32))) svint32_t svxar(svint32_t, svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_s64))) svint64_t svxar(svint64_t, svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_s16))) svint16_t svxar(svint16_t, svint16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_bf16))) svbfloat16_t svtbl2_bf16(svbfloat16x2_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_bf16))) svbfloat16_t svtbx_bf16(svbfloat16_t, svbfloat16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_bf16))) svbool_t svwhilerw_bf16(bfloat16_t const *, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_bf16))) svbool_t svwhilewr_bf16(bfloat16_t const *, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_bf16))) svbfloat16_t svtbl2(svbfloat16x2_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_bf16))) svbfloat16_t svtbx(svbfloat16_t, svbfloat16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_bf16))) svbool_t svwhilerw(bfloat16_t const *, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_bf16))) svbool_t svwhilewr(bfloat16_t const *, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesd_u8))) svuint8_t svaesd_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaese_u8))) svuint8_t svaese_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesimc_u8))) svuint8_t svaesimc_u8(svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesmc_u8))) svuint8_t svaesmc_u8(svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_n_u64))) svuint64_t svpmullb_pair_n_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_u64))) svuint64_t svpmullb_pair_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_n_u64))) svuint64_t svpmullt_pair_n_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_u64))) svuint64_t svpmullt_pair_u64(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesd_u8))) svuint8_t svaesd(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaese_u8))) svuint8_t svaese(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesimc_u8))) svuint8_t svaesimc(svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesmc_u8))) svuint8_t svaesmc(svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_n_u64))) svuint64_t svpmullb_pair(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_u64))) svuint64_t svpmullb_pair(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_n_u64))) svuint64_t svpmullt_pair(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_u64))) svuint64_t svpmullt_pair(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u8))) svuint8_t svbdep_n_u8(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u32))) svuint32_t svbdep_n_u32(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u64))) svuint64_t svbdep_n_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u16))) svuint16_t svbdep_n_u16(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u8))) svuint8_t svbdep_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u32))) svuint32_t svbdep_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u64))) svuint64_t svbdep_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u16))) svuint16_t svbdep_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u8))) svuint8_t svbext_n_u8(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u32))) svuint32_t svbext_n_u32(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u64))) svuint64_t svbext_n_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u16))) svuint16_t svbext_n_u16(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u8))) svuint8_t svbext_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u32))) svuint32_t svbext_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u64))) svuint64_t svbext_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u16))) svuint16_t svbext_u16(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u8))) svuint8_t svbgrp_n_u8(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u32))) svuint32_t svbgrp_n_u32(svuint32_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u64))) svuint64_t svbgrp_n_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u16))) svuint16_t svbgrp_n_u16(svuint16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u8))) svuint8_t svbgrp_u8(svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u32))) svuint32_t svbgrp_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u64))) svuint64_t svbgrp_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u16))) svuint16_t svbgrp_u16(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u8))) svuint8_t svbdep(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u32))) svuint32_t svbdep(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u64))) svuint64_t svbdep(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u16))) svuint16_t svbdep(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u8))) svuint8_t svbdep(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u32))) svuint32_t svbdep(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u64))) svuint64_t svbdep(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u16))) svuint16_t svbdep(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u8))) svuint8_t svbext(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u32))) svuint32_t svbext(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u64))) svuint64_t svbext(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u16))) svuint16_t svbext(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u8))) svuint8_t svbext(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u32))) svuint32_t svbext(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u64))) svuint64_t svbext(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u16))) svuint16_t svbext(svuint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u8))) svuint8_t svbgrp(svuint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u32))) svuint32_t svbgrp(svuint32_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u64))) svuint64_t svbgrp(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u16))) svuint16_t svbgrp(svuint16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u8))) svuint8_t svbgrp(svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u32))) svuint32_t svbgrp(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u64))) svuint64_t svbgrp(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u16))) svuint16_t svbgrp(svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrax1_u64))) svuint64_t svrax1_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrax1_s64))) svint64_t svrax1_s64(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrax1_u64))) svuint64_t svrax1(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrax1_s64))) svint64_t svrax1(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsm4e_u32))) svuint32_t svsm4e_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsm4ekey_u32))) svuint32_t svsm4ekey_u32(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsm4e_u32))) svuint32_t svsm4e(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsm4ekey_u32))) svuint32_t svsm4ekey(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_f64))) svfloat64_t svclamp_f64(svfloat64_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_f32))) svfloat32_t svclamp_f32(svfloat32_t, svfloat32_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_f16))) svfloat16_t svclamp_f16(svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_s8))) svint8_t svclamp_s8(svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_s32))) svint32_t svclamp_s32(svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_s64))) svint64_t svclamp_s64(svint64_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_s16))) svint16_t svclamp_s16(svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u8))) svuint8_t svclamp_u8(svuint8_t, svuint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u32))) svuint32_t svclamp_u32(svuint32_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u64))) svuint64_t svclamp_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u16))) svuint16_t svclamp_u16(svuint16_t, svuint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_c8))) svcount_t svptrue_c8(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_c32))) svcount_t svptrue_c32(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_c64))) svcount_t svptrue_c64(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_c16))) svcount_t svptrue_c16(void); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_f64))) svfloat64_t svclamp(svfloat64_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_f32))) svfloat32_t svclamp(svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_f16))) svfloat16_t svclamp(svfloat16_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_s8))) svint8_t svclamp(svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_s32))) svint32_t svclamp(svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_s64))) svint64_t svclamp(svint64_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_s16))) svint16_t svclamp(svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u8))) svuint8_t svclamp(svuint8_t, svuint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u32))) svuint32_t svclamp(svuint32_t, svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u64))) svuint64_t svclamp(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u16))) svuint16_t svclamp(svuint16_t, svuint16_t, svuint16_t); #define svcvtnt_bf16_x svcvtnt_bf16_m #define svcvtnt_bf16_f32_x svcvtnt_bf16_f32_m #define svcvtnt_f16_x svcvtnt_f16_m #define svcvtnt_f16_f32_x svcvtnt_f16_f32_m #define svcvtnt_f32_x svcvtnt_f32_m #define svcvtnt_f32_f64_x svcvtnt_f32_f64_m #define svcvtxnt_f32_x svcvtxnt_f32_m #define svcvtxnt_f32_f64_x svcvtxnt_f32_f64_m #ifdef __cplusplus } // extern "C" #endif #undef __ai #undef __aio #endif /* __ARM_SVE_H */ /*===---- armintr.h - ARM Windows intrinsics -------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ /* Only include this if we're compiling for the windows platform. */ #ifndef _MSC_VER #include_next #else #ifndef __ARMINTR_H #define __ARMINTR_H typedef enum { _ARM_BARRIER_SY = 0xF, _ARM_BARRIER_ST = 0xE, _ARM_BARRIER_ISH = 0xB, _ARM_BARRIER_ISHST = 0xA, _ARM_BARRIER_NSH = 0x7, _ARM_BARRIER_NSHST = 0x6, _ARM_BARRIER_OSH = 0x3, _ARM_BARRIER_OSHST = 0x2 } _ARMINTR_BARRIER_TYPE; #endif /* __ARMINTR_H */ #endif /* _MSC_VER */ /*===---- avx2intrin.h - AVX2 intrinsics -----------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __AVX2INTRIN_H #define __AVX2INTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx2"), __min_vector_width__(256))) #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx2"), __min_vector_width__(128))) /* SSE4 Multiple Packed Sums of Absolute Difference. */ /// Computes sixteen sum of absolute difference (SAD) operations on sets of /// four unsigned 8-bit integers from the 256-bit integer vectors \a X and /// \a Y. /// /// Eight SAD results are computed using the lower half of the input /// vectors, and another eight using the upper half. These 16-bit values /// are returned in the lower and upper halves of the 256-bit result, /// respectively. /// /// A single SAD operation selects four bytes from \a X and four bytes from /// \a Y as input. It computes the differences between each \a X byte and /// the corresponding \a Y byte, takes the absolute value of each /// difference, and sums these four values to form one 16-bit result. The /// intrinsic computes 16 of these results with different sets of input /// bytes. /// /// For each set of eight results, the SAD operations use the same four /// bytes from \a Y; the starting bit position for these four bytes is /// specified by \a M[1:0] times 32. The eight operations use successive /// sets of four bytes from \a X; the starting bit position for the first /// set of four bytes is specified by \a M[2] times 32. These bit positions /// are all relative to the 128-bit lane for each set of eight operations. /// /// \code{.operation} /// r := 0 /// FOR i := 0 TO 1 /// j := i*3 /// Ybase := M[j+1:j]*32 + i*128 /// Xbase := M[j+2]*32 + i*128 /// FOR k := 0 TO 3 /// temp0 := ABS(X[Xbase+7:Xbase] - Y[Ybase+7:Ybase]) /// temp1 := ABS(X[Xbase+15:Xbase+8] - Y[Ybase+15:Ybase+8]) /// temp2 := ABS(X[Xbase+23:Xbase+16] - Y[Ybase+23:Ybase+16]) /// temp3 := ABS(X[Xbase+31:Xbase+24] - Y[Ybase+31:Ybase+24]) /// result[r+15:r] := temp0 + temp1 + temp2 + temp3 /// Xbase := Xbase + 8 /// r := r + 16 /// ENDFOR /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256i _mm256_mpsadbw_epu8(__m256i X, __m256i Y, const int M); /// \endcode /// /// This intrinsic corresponds to the \c VMPSADBW instruction. /// /// \param X /// A 256-bit integer vector containing one of the inputs. /// \param Y /// A 256-bit integer vector containing one of the inputs. /// \param M /// An unsigned immediate value specifying the starting positions of the /// bytes to operate on. /// \returns A 256-bit vector of [16 x i16] containing the result. #define _mm256_mpsadbw_epu8(X, Y, M) \ ((__m256i)__builtin_ia32_mpsadbw256((__v32qi)(__m256i)(X), \ (__v32qi)(__m256i)(Y), (int)(M))) /// Computes the absolute value of each signed byte in the 256-bit integer /// vector \a __a and returns each value in the corresponding byte of /// the result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPABSB instruction. /// /// \param __a /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi8(__m256i __a) { return (__m256i)__builtin_elementwise_abs((__v32qs)__a); } /// Computes the absolute value of each signed 16-bit element in the 256-bit /// vector of [16 x i16] in \a __a and returns each value in the /// corresponding element of the result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPABSW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16]. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi16(__m256i __a) { return (__m256i)__builtin_elementwise_abs((__v16hi)__a); } /// Computes the absolute value of each signed 32-bit element in the 256-bit /// vector of [8 x i32] in \a __a and returns each value in the /// corresponding element of the result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPABSD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32]. /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi32(__m256i __a) { return (__m256i)__builtin_elementwise_abs((__v8si)__a); } /// Converts the elements of two 256-bit vectors of [16 x i16] to 8-bit /// integers using signed saturation, and returns the 256-bit result. /// /// \code{.operation} /// FOR i := 0 TO 7 /// j := i*16 /// k := i*8 /// result[7+k:k] := SATURATE8(__a[15+j:j]) /// result[71+k:64+k] := SATURATE8(__b[15+j:j]) /// result[135+k:128+k] := SATURATE8(__a[143+j:128+j]) /// result[199+k:192+k] := SATURATE8(__b[143+j:128+j]) /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPACKSSWB instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] used to generate result[63:0] and /// result[191:128]. /// \param __b /// A 256-bit vector of [16 x i16] used to generate result[127:64] and /// result[255:192]. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_packs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_packsswb256((__v16hi)__a, (__v16hi)__b); } /// Converts the elements of two 256-bit vectors of [8 x i32] to 16-bit /// integers using signed saturation, and returns the resulting 256-bit /// vector of [16 x i16]. /// /// \code{.operation} /// FOR i := 0 TO 3 /// j := i*32 /// k := i*16 /// result[15+k:k] := SATURATE16(__a[31+j:j]) /// result[79+k:64+k] := SATURATE16(__b[31+j:j]) /// result[143+k:128+k] := SATURATE16(__a[159+j:128+j]) /// result[207+k:192+k] := SATURATE16(__b[159+j:128+j]) /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPACKSSDW instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] used to generate result[63:0] and /// result[191:128]. /// \param __b /// A 256-bit vector of [8 x i32] used to generate result[127:64] and /// result[255:192]. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_packs_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_packssdw256((__v8si)__a, (__v8si)__b); } /// Converts elements from two 256-bit vectors of [16 x i16] to 8-bit integers /// using unsigned saturation, and returns the 256-bit result. /// /// \code{.operation} /// FOR i := 0 TO 7 /// j := i*16 /// k := i*8 /// result[7+k:k] := SATURATE8U(__a[15+j:j]) /// result[71+k:64+k] := SATURATE8U(__b[15+j:j]) /// result[135+k:128+k] := SATURATE8U(__a[143+j:128+j]) /// result[199+k:192+k] := SATURATE8U(__b[143+j:128+j]) /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPACKUSWB instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] used to generate result[63:0] and /// result[191:128]. /// \param __b /// A 256-bit vector of [16 x i16] used to generate result[127:64] and /// result[255:192]. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_packus_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_packuswb256((__v16hi)__a, (__v16hi)__b); } /// Converts elements from two 256-bit vectors of [8 x i32] to 16-bit integers /// using unsigned saturation, and returns the resulting 256-bit vector of /// [16 x i16]. /// /// \code{.operation} /// FOR i := 0 TO 3 /// j := i*32 /// k := i*16 /// result[15+k:k] := SATURATE16U(__V1[31+j:j]) /// result[79+k:64+k] := SATURATE16U(__V2[31+j:j]) /// result[143+k:128+k] := SATURATE16U(__V1[159+j:128+j]) /// result[207+k:192+k] := SATURATE16U(__V2[159+j:128+j]) /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPACKUSDW instruction. /// /// \param __V1 /// A 256-bit vector of [8 x i32] used to generate result[63:0] and /// result[191:128]. /// \param __V2 /// A 256-bit vector of [8 x i32] used to generate result[127:64] and /// result[255:192]. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_packus_epi32(__m256i __V1, __m256i __V2) { return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2); } /// Adds 8-bit integers from corresponding bytes of two 256-bit integer /// vectors and returns the lower 8 bits of each sum in the corresponding /// byte of the 256-bit integer vector result (overflow is ignored). /// /// \headerfile /// /// This intrinsic corresponds to the \c VPADDB instruction. /// /// \param __a /// A 256-bit integer vector containing one of the source operands. /// \param __b /// A 256-bit integer vector containing one of the source operands. /// \returns A 256-bit integer vector containing the sums. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi8(__m256i __a, __m256i __b) { return (__m256i)((__v32qu)__a + (__v32qu)__b); } /// Adds 16-bit integers from corresponding elements of two 256-bit vectors of /// [16 x i16] and returns the lower 16 bits of each sum in the /// corresponding element of the [16 x i16] result (overflow is ignored). /// /// \headerfile /// /// This intrinsic corresponds to the \c VPADDW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the sums. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hu)__a + (__v16hu)__b); } /// Adds 32-bit integers from corresponding elements of two 256-bit vectors of /// [8 x i32] and returns the lower 32 bits of each sum in the corresponding /// element of the [8 x i32] result (overflow is ignored). /// /// \headerfile /// /// This intrinsic corresponds to the \c VPADDD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \param __b /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \returns A 256-bit vector of [8 x i32] containing the sums. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8su)__a + (__v8su)__b); } /// Adds 64-bit integers from corresponding elements of two 256-bit vectors of /// [4 x i64] and returns the lower 64 bits of each sum in the corresponding /// element of the [4 x i64] result (overflow is ignored). /// /// \headerfile /// /// This intrinsic corresponds to the \c VPADDQ instruction. /// /// \param __a /// A 256-bit vector of [4 x i64] containing one of the source operands. /// \param __b /// A 256-bit vector of [4 x i64] containing one of the source operands. /// \returns A 256-bit vector of [4 x i64] containing the sums. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a + (__v4du)__b); } /// Adds 8-bit integers from corresponding bytes of two 256-bit integer /// vectors using signed saturation, and returns each sum in the /// corresponding byte of the 256-bit integer vector result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPADDSB instruction. /// /// \param __a /// A 256-bit integer vector containing one of the source operands. /// \param __b /// A 256-bit integer vector containing one of the source operands. /// \returns A 256-bit integer vector containing the sums. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_add_sat((__v32qs)__a, (__v32qs)__b); } /// Adds 16-bit integers from corresponding elements of two 256-bit vectors of /// [16 x i16] using signed saturation, and returns the [16 x i16] result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPADDSW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the sums. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_add_sat((__v16hi)__a, (__v16hi)__b); } /// Adds 8-bit integers from corresponding bytes of two 256-bit integer /// vectors using unsigned saturation, and returns each sum in the /// corresponding byte of the 256-bit integer vector result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPADDUSB instruction. /// /// \param __a /// A 256-bit integer vector containing one of the source operands. /// \param __b /// A 256-bit integer vector containing one of the source operands. /// \returns A 256-bit integer vector containing the sums. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epu8(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_add_sat((__v32qu)__a, (__v32qu)__b); } /// Adds 16-bit integers from corresponding elements of two 256-bit vectors of /// [16 x i16] using unsigned saturation, and returns the [16 x i16] result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPADDUSW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the sums. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_add_sat((__v16hu)__a, (__v16hu)__b); } /// Uses the lower half of the 256-bit vector \a a as the upper half of a /// temporary 256-bit value, and the lower half of the 256-bit vector \a b /// as the lower half of the temporary value. Right-shifts the temporary /// value by \a n bytes, and uses the lower 16 bytes of the shifted value /// as the lower 16 bytes of the result. Uses the upper halves of \a a and /// \a b to make another temporary value, right shifts by \a n, and uses /// the lower 16 bytes of the shifted value as the upper 16 bytes of the /// result. /// /// \headerfile /// /// \code /// __m256i _mm256_alignr_epi8(__m256i a, __m256i b, const int n); /// \endcode /// /// This intrinsic corresponds to the \c VPALIGNR instruction. /// /// \param a /// A 256-bit integer vector containing source values. /// \param b /// A 256-bit integer vector containing source values. /// \param n /// An immediate value specifying the number of bytes to shift. /// \returns A 256-bit integer vector containing the result. #define _mm256_alignr_epi8(a, b, n) \ ((__m256i)__builtin_ia32_palignr256((__v32qi)(__m256i)(a), \ (__v32qi)(__m256i)(b), (n))) /// Computes the bitwise AND of the 256-bit integer vectors in \a __a and /// \a __b. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPAND instruction. /// /// \param __a /// A 256-bit integer vector. /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_and_si256(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a & (__v4du)__b); } /// Computes the bitwise AND of the 256-bit integer vector in \a __b with /// the bitwise NOT of the 256-bit integer vector in \a __a. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPANDN instruction. /// /// \param __a /// A 256-bit integer vector. /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_andnot_si256(__m256i __a, __m256i __b) { return (__m256i)(~(__v4du)__a & (__v4du)__b); } /// Computes the averages of the corresponding unsigned bytes in the two /// 256-bit integer vectors in \a __a and \a __b and returns each /// average in the corresponding byte of the 256-bit result. /// /// \code{.operation} /// FOR i := 0 TO 31 /// j := i*8 /// result[j+7:j] := (__a[j+7:j] + __b[j+7:j] + 1) >> 1 /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPAVGB instruction. /// /// \param __a /// A 256-bit integer vector. /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_avg_epu8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pavgb256((__v32qi)__a, (__v32qi)__b); } /// Computes the averages of the corresponding unsigned 16-bit integers in /// the two 256-bit vectors of [16 x i16] in \a __a and \a __b and returns /// each average in the corresponding element of the 256-bit result. /// /// \code{.operation} /// FOR i := 0 TO 15 /// j := i*16 /// result[j+15:j] := (__a[j+15:j] + __b[j+15:j] + 1) >> 1 /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPAVGW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16]. /// \param __b /// A 256-bit vector of [16 x i16]. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_avg_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pavgw256((__v16hi)__a, (__v16hi)__b); } /// Merges 8-bit integer values from either of the two 256-bit vectors /// \a __V1 or \a __V2, as specified by the 256-bit mask \a __M and returns /// the resulting 256-bit integer vector. /// /// \code{.operation} /// FOR i := 0 TO 31 /// j := i*8 /// IF __M[7+i] == 0 /// result[7+j:j] := __V1[7+j:j] /// ELSE /// result[7+j:j] := __V2[7+j:j] /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPBLENDVB instruction. /// /// \param __V1 /// A 256-bit integer vector containing source values. /// \param __V2 /// A 256-bit integer vector containing source values. /// \param __M /// A 256-bit integer vector, with bit [7] of each byte specifying the /// source for each corresponding byte of the result. When the mask bit /// is 0, the byte is copied from \a __V1; otherwise, it is copied from /// \a __V2. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M) { return (__m256i)__builtin_ia32_pblendvb256((__v32qi)__V1, (__v32qi)__V2, (__v32qi)__M); } /// Merges 16-bit integer values from either of the two 256-bit vectors /// \a V1 or \a V2, as specified by the immediate integer operand \a M, /// and returns the resulting 256-bit vector of [16 x i16]. /// /// \code{.operation} /// FOR i := 0 TO 7 /// j := i*16 /// IF M[i] == 0 /// result[7+j:j] := V1[7+j:j] /// result[135+j:128+j] := V1[135+j:128+j] /// ELSE /// result[7+j:j] := V2[7+j:j] /// result[135+j:128+j] := V2[135+j:128+j] /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256i _mm256_blend_epi16(__m256i V1, __m256i V2, const int M); /// \endcode /// /// This intrinsic corresponds to the \c VPBLENDW instruction. /// /// \param V1 /// A 256-bit vector of [16 x i16] containing source values. /// \param V2 /// A 256-bit vector of [16 x i16] containing source values. /// \param M /// An immediate 8-bit integer operand, with bits [7:0] specifying the /// source for each element of the result. The position of the mask bit /// corresponds to the index of a copied value. When a mask bit is 0, the /// element is copied from \a V1; otherwise, it is copied from \a V2. /// \a M[0] determines the source for elements 0 and 8, \a M[1] for /// elements 1 and 9, and so forth. /// \returns A 256-bit vector of [16 x i16] containing the result. #define _mm256_blend_epi16(V1, V2, M) \ ((__m256i)__builtin_ia32_pblendw256((__v16hi)(__m256i)(V1), \ (__v16hi)(__m256i)(V2), (int)(M))) /// Compares corresponding bytes in the 256-bit integer vectors in \a __a and /// \a __b for equality and returns the outcomes in the corresponding /// bytes of the 256-bit result. /// /// \code{.operation} /// FOR i := 0 TO 31 /// j := i*8 /// result[j+7:j] := (__a[j+7:j] == __b[j+7:j]) ? 0xFF : 0 /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPCMPEQB instruction. /// /// \param __a /// A 256-bit integer vector containing one of the inputs. /// \param __b /// A 256-bit integer vector containing one of the inputs. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpeq_epi8(__m256i __a, __m256i __b) { return (__m256i)((__v32qi)__a == (__v32qi)__b); } /// Compares corresponding elements in the 256-bit vectors of [16 x i16] in /// \a __a and \a __b for equality and returns the outcomes in the /// corresponding elements of the 256-bit result. /// /// \code{.operation} /// FOR i := 0 TO 15 /// j := i*16 /// result[j+15:j] := (__a[j+15:j] == __b[j+15:j]) ? 0xFFFF : 0 /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPCMPEQW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] containing one of the inputs. /// \param __b /// A 256-bit vector of [16 x i16] containing one of the inputs. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpeq_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hi)__a == (__v16hi)__b); } /// Compares corresponding elements in the 256-bit vectors of [8 x i32] in /// \a __a and \a __b for equality and returns the outcomes in the /// corresponding elements of the 256-bit result. /// /// \code{.operation} /// FOR i := 0 TO 7 /// j := i*32 /// result[j+31:j] := (__a[j+31:j] == __b[j+31:j]) ? 0xFFFFFFFF : 0 /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPCMPEQD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] containing one of the inputs. /// \param __b /// A 256-bit vector of [8 x i32] containing one of the inputs. /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpeq_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8si)__a == (__v8si)__b); } /// Compares corresponding elements in the 256-bit vectors of [4 x i64] in /// \a __a and \a __b for equality and returns the outcomes in the /// corresponding elements of the 256-bit result. /// /// \code{.operation} /// FOR i := 0 TO 3 /// j := i*64 /// result[j+63:j] := (__a[j+63:j] == __b[j+63:j]) ? 0xFFFFFFFFFFFFFFFF : 0 /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPCMPEQQ instruction. /// /// \param __a /// A 256-bit vector of [4 x i64] containing one of the inputs. /// \param __b /// A 256-bit vector of [4 x i64] containing one of the inputs. /// \returns A 256-bit vector of [4 x i64] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpeq_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4di)__a == (__v4di)__b); } /// Compares corresponding signed bytes in the 256-bit integer vectors in /// \a __a and \a __b for greater-than and returns the outcomes in the /// corresponding bytes of the 256-bit result. /// /// \code{.operation} /// FOR i := 0 TO 31 /// j := i*8 /// result[j+7:j] := (__a[j+7:j] > __b[j+7:j]) ? 0xFF : 0 /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPCMPGTB instruction. /// /// \param __a /// A 256-bit integer vector containing one of the inputs. /// \param __b /// A 256-bit integer vector containing one of the inputs. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpgt_epi8(__m256i __a, __m256i __b) { /* This function always performs a signed comparison, but __v32qi is a char which may be signed or unsigned, so use __v32qs. */ return (__m256i)((__v32qs)__a > (__v32qs)__b); } /// Compares corresponding signed elements in the 256-bit vectors of /// [16 x i16] in \a __a and \a __b for greater-than and returns the /// outcomes in the corresponding elements of the 256-bit result. /// /// \code{.operation} /// FOR i := 0 TO 15 /// j := i*16 /// result[j+15:j] := (__a[j+15:j] > __b[j+15:j]) ? 0xFFFF : 0 /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPCMPGTW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] containing one of the inputs. /// \param __b /// A 256-bit vector of [16 x i16] containing one of the inputs. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpgt_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hi)__a > (__v16hi)__b); } /// Compares corresponding signed elements in the 256-bit vectors of /// [8 x i32] in \a __a and \a __b for greater-than and returns the /// outcomes in the corresponding elements of the 256-bit result. /// /// \code{.operation} /// FOR i := 0 TO 7 /// j := i*32 /// result[j+31:j] := (__a[j+31:j] > __b[j+31:j]) ? 0xFFFFFFFF : 0 /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPCMPGTD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] containing one of the inputs. /// \param __b /// A 256-bit vector of [8 x i32] containing one of the inputs. /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpgt_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8si)__a > (__v8si)__b); } /// Compares corresponding signed elements in the 256-bit vectors of /// [4 x i64] in \a __a and \a __b for greater-than and returns the /// outcomes in the corresponding elements of the 256-bit result. /// /// \code{.operation} /// FOR i := 0 TO 3 /// j := i*64 /// result[j+63:j] := (__a[j+63:j] > __b[j+63:j]) ? 0xFFFFFFFFFFFFFFFF : 0 /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPCMPGTQ instruction. /// /// \param __a /// A 256-bit vector of [4 x i64] containing one of the inputs. /// \param __b /// A 256-bit vector of [4 x i64] containing one of the inputs. /// \returns A 256-bit vector of [4 x i64] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpgt_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4di)__a > (__v4di)__b); } /// Horizontally adds the adjacent pairs of 16-bit integers from two 256-bit /// vectors of [16 x i16] and returns the lower 16 bits of each sum in an /// element of the [16 x i16] result (overflow is ignored). Sums from /// \a __a are returned in the lower 64 bits of each 128-bit half of the /// result; sums from \a __b are returned in the upper 64 bits of each /// 128-bit half of the result. /// /// \code{.operation} /// FOR i := 0 TO 1 /// j := i*128 /// result[j+15:j] := __a[j+15:j] + __a[j+31:j+16] /// result[j+31:j+16] := __a[j+47:j+32] + __a[j+63:j+48] /// result[j+47:j+32] := __a[j+79:j+64] + __a[j+95:j+80] /// result[j+63:j+48] := __a[j+111:j+96] + __a[j+127:j+112] /// result[j+79:j+64] := __b[j+15:j] + __b[j+31:j+16] /// result[j+95:j+80] := __b[j+47:j+32] + __b[j+63:j+48] /// result[j+111:j+96] := __b[j+79:j+64] + __b[j+95:j+80] /// result[j+127:j+112] := __b[j+111:j+96] + __b[j+127:j+112] /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPHADDW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the sums. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_hadd_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b); } /// Horizontally adds the adjacent pairs of 32-bit integers from two 256-bit /// vectors of [8 x i32] and returns the lower 32 bits of each sum in an /// element of the [8 x i32] result (overflow is ignored). Sums from \a __a /// are returned in the lower 64 bits of each 128-bit half of the result; /// sums from \a __b are returned in the upper 64 bits of each 128-bit half /// of the result. /// /// \code{.operation} /// FOR i := 0 TO 1 /// j := i*128 /// result[j+31:j] := __a[j+31:j] + __a[j+63:j+32] /// result[j+63:j+32] := __a[j+95:j+64] + __a[j+127:j+96] /// result[j+95:j+64] := __b[j+31:j] + __b[j+63:j+32] /// result[j+127:j+96] := __b[j+95:j+64] + __b[j+127:j+96] /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPHADDD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \param __b /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \returns A 256-bit vector of [8 x i32] containing the sums. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_hadd_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b); } /// Horizontally adds the adjacent pairs of 16-bit integers from two 256-bit /// vectors of [16 x i16] using signed saturation and returns each sum in /// an element of the [16 x i16] result. Sums from \a __a are returned in /// the lower 64 bits of each 128-bit half of the result; sums from \a __b /// are returned in the upper 64 bits of each 128-bit half of the result. /// /// \code{.operation} /// FOR i := 0 TO 1 /// j := i*128 /// result[j+15:j] := SATURATE16(__a[j+15:j] + __a[j+31:j+16]) /// result[j+31:j+16] := SATURATE16(__a[j+47:j+32] + __a[j+63:j+48]) /// result[j+47:j+32] := SATURATE16(__a[j+79:j+64] + __a[j+95:j+80]) /// result[j+63:j+48] := SATURATE16(__a[j+111:j+96] + __a[j+127:j+112]) /// result[j+79:j+64] := SATURATE16(__b[j+15:j] + __b[j+31:j+16]) /// result[j+95:j+80] := SATURATE16(__b[j+47:j+32] + __b[j+63:j+48]) /// result[j+111:j+96] := SATURATE16(__b[j+79:j+64] + __b[j+95:j+80]) /// result[j+127:j+112] := SATURATE16(__b[j+111:j+96] + __b[j+127:j+112]) /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPHADDSW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the sums. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_hadds_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b); } /// Horizontally subtracts adjacent pairs of 16-bit integers from two 256-bit /// vectors of [16 x i16] and returns the lower 16 bits of each difference /// in an element of the [16 x i16] result (overflow is ignored). /// Differences from \a __a are returned in the lower 64 bits of each /// 128-bit half of the result; differences from \a __b are returned in the /// upper 64 bits of each 128-bit half of the result. /// /// \code{.operation} /// FOR i := 0 TO 1 /// j := i*128 /// result[j+15:j] := __a[j+15:j] - __a[j+31:j+16] /// result[j+31:j+16] := __a[j+47:j+32] - __a[j+63:j+48] /// result[j+47:j+32] := __a[j+79:j+64] - __a[j+95:j+80] /// result[j+63:j+48] := __a[j+111:j+96] - __a[j+127:j+112] /// result[j+79:j+64] := __b[j+15:j] - __b[j+31:j+16] /// result[j+95:j+80] := __b[j+47:j+32] - __b[j+63:j+48] /// result[j+111:j+96] := __b[j+79:j+64] - __b[j+95:j+80] /// result[j+127:j+112] := __b[j+111:j+96] - __b[j+127:j+112] /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPHSUBW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the differences. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_hsub_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b); } /// Horizontally subtracts adjacent pairs of 32-bit integers from two 256-bit /// vectors of [8 x i32] and returns the lower 32 bits of each difference in /// an element of the [8 x i32] result (overflow is ignored). Differences /// from \a __a are returned in the lower 64 bits of each 128-bit half of /// the result; differences from \a __b are returned in the upper 64 bits /// of each 128-bit half of the result. /// /// \code{.operation} /// FOR i := 0 TO 1 /// j := i*128 /// result[j+31:j] := __a[j+31:j] - __a[j+63:j+32] /// result[j+63:j+32] := __a[j+95:j+64] - __a[j+127:j+96] /// result[j+95:j+64] := __b[j+31:j] - __b[j+63:j+32] /// result[j+127:j+96] := __b[j+95:j+64] - __b[j+127:j+96] /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPHSUBD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \param __b /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \returns A 256-bit vector of [8 x i32] containing the differences. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_hsub_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b); } /// Horizontally subtracts adjacent pairs of 16-bit integers from two 256-bit /// vectors of [16 x i16] using signed saturation and returns each sum in /// an element of the [16 x i16] result. Differences from \a __a are /// returned in the lower 64 bits of each 128-bit half of the result; /// differences from \a __b are returned in the upper 64 bits of each /// 128-bit half of the result. /// /// \code{.operation} /// FOR i := 0 TO 1 /// j := i*128 /// result[j+15:j] := SATURATE16(__a[j+15:j] - __a[j+31:j+16]) /// result[j+31:j+16] := SATURATE16(__a[j+47:j+32] - __a[j+63:j+48]) /// result[j+47:j+32] := SATURATE16(__a[j+79:j+64] - __a[j+95:j+80]) /// result[j+63:j+48] := SATURATE16(__a[j+111:j+96] - __a[j+127:j+112]) /// result[j+79:j+64] := SATURATE16(__b[j+15:j] - __b[j+31:j+16]) /// result[j+95:j+80] := SATURATE16(__b[j+47:j+32] - __b[j+63:j+48]) /// result[j+111:j+96] := SATURATE16(__b[j+79:j+64] - __b[j+95:j+80]) /// result[j+127:j+112] := SATURATE16(__b[j+111:j+96] - __b[j+127:j+112]) /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPHSUBSW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the differences. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_hsubs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b); } /// Multiplies each unsigned byte from the 256-bit integer vector in \a __a /// with the corresponding signed byte from the 256-bit integer vector in /// \a __b, forming signed 16-bit intermediate products. Adds adjacent /// pairs of those products using signed saturation to form 16-bit sums /// returned as elements of the [16 x i16] result. /// /// \code{.operation} /// FOR i := 0 TO 15 /// j := i*16 /// temp1 := __a[j+7:j] * __b[j+7:j] /// temp2 := __a[j+15:j+8] * __b[j+15:j+8] /// result[j+15:j] := SATURATE16(temp1 + temp2) /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMADDUBSW instruction. /// /// \param __a /// A 256-bit vector containing one of the source operands. /// \param __b /// A 256-bit vector containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maddubs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)__a, (__v32qi)__b); } /// Multiplies corresponding 16-bit elements of two 256-bit vectors of /// [16 x i16], forming 32-bit intermediate products, and adds pairs of /// those products to form 32-bit sums returned as elements of the /// [8 x i32] result. /// /// There is only one wraparound case: when all four of the 16-bit sources /// are \c 0x8000, the result will be \c 0x80000000. /// /// \code{.operation} /// FOR i := 0 TO 7 /// j := i*32 /// temp1 := __a[j+15:j] * __b[j+15:j] /// temp2 := __a[j+31:j+16] * __b[j+31:j+16] /// result[j+31:j] := temp1 + temp2 /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMADDWD instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_madd_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaddwd256((__v16hi)__a, (__v16hi)__b); } /// Compares the corresponding signed bytes in the two 256-bit integer vectors /// in \a __a and \a __b and returns the larger of each pair in the /// corresponding byte of the 256-bit result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMAXSB instruction. /// /// \param __a /// A 256-bit integer vector. /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_max((__v32qs)__a, (__v32qs)__b); } /// Compares the corresponding signed 16-bit integers in the two 256-bit /// vectors of [16 x i16] in \a __a and \a __b and returns the larger of /// each pair in the corresponding element of the 256-bit result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMAXSW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16]. /// \param __b /// A 256-bit vector of [16 x i16]. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_max((__v16hi)__a, (__v16hi)__b); } /// Compares the corresponding signed 32-bit integers in the two 256-bit /// vectors of [8 x i32] in \a __a and \a __b and returns the larger of /// each pair in the corresponding element of the 256-bit result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMAXSD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32]. /// \param __b /// A 256-bit vector of [8 x i32]. /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_max((__v8si)__a, (__v8si)__b); } /// Compares the corresponding unsigned bytes in the two 256-bit integer /// vectors in \a __a and \a __b and returns the larger of each pair in /// the corresponding byte of the 256-bit result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMAXUB instruction. /// /// \param __a /// A 256-bit integer vector. /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu8(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_max((__v32qu)__a, (__v32qu)__b); } /// Compares the corresponding unsigned 16-bit integers in the two 256-bit /// vectors of [16 x i16] in \a __a and \a __b and returns the larger of /// each pair in the corresponding element of the 256-bit result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMAXUW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16]. /// \param __b /// A 256-bit vector of [16 x i16]. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_max((__v16hu)__a, (__v16hu)__b); } /// Compares the corresponding unsigned 32-bit integers in the two 256-bit /// vectors of [8 x i32] in \a __a and \a __b and returns the larger of /// each pair in the corresponding element of the 256-bit result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMAXUD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32]. /// \param __b /// A 256-bit vector of [8 x i32]. /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu32(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_max((__v8su)__a, (__v8su)__b); } /// Compares the corresponding signed bytes in the two 256-bit integer vectors /// in \a __a and \a __b and returns the smaller of each pair in the /// corresponding byte of the 256-bit result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMINSB instruction. /// /// \param __a /// A 256-bit integer vector. /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_min((__v32qs)__a, (__v32qs)__b); } /// Compares the corresponding signed 16-bit integers in the two 256-bit /// vectors of [16 x i16] in \a __a and \a __b and returns the smaller of /// each pair in the corresponding element of the 256-bit result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMINSW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16]. /// \param __b /// A 256-bit vector of [16 x i16]. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_min((__v16hi)__a, (__v16hi)__b); } /// Compares the corresponding signed 32-bit integers in the two 256-bit /// vectors of [8 x i32] in \a __a and \a __b and returns the smaller of /// each pair in the corresponding element of the 256-bit result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMINSD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32]. /// \param __b /// A 256-bit vector of [8 x i32]. /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_min((__v8si)__a, (__v8si)__b); } /// Compares the corresponding unsigned bytes in the two 256-bit integer /// vectors in \a __a and \a __b and returns the smaller of each pair in /// the corresponding byte of the 256-bit result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMINUB instruction. /// /// \param __a /// A 256-bit integer vector. /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu8(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_min((__v32qu)__a, (__v32qu)__b); } /// Compares the corresponding unsigned 16-bit integers in the two 256-bit /// vectors of [16 x i16] in \a __a and \a __b and returns the smaller of /// each pair in the corresponding element of the 256-bit result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMINUW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16]. /// \param __b /// A 256-bit vector of [16 x i16]. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_min((__v16hu)__a, (__v16hu)__b); } /// Compares the corresponding unsigned 32-bit integers in the two 256-bit /// vectors of [8 x i32] in \a __a and \a __b and returns the smaller of /// each pair in the corresponding element of the 256-bit result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMINUD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32]. /// \param __b /// A 256-bit vector of [8 x i32]. /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu32(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_min((__v8su)__a, (__v8su)__b); } static __inline__ int __DEFAULT_FN_ATTRS256 _mm256_movemask_epi8(__m256i __a) { return __builtin_ia32_pmovmskb256((__v32qi)__a); } /// Sign-extends bytes from the 128-bit integer vector in \a __V and returns /// the 16-bit values in the corresponding elements of a 256-bit vector /// of [16 x i16]. /// /// \code{.operation} /// FOR i := 0 TO 15 /// j := i*8 /// k := i*16 /// result[k+15:k] := SignExtend(__V[j+7:j]) /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMOVSXBW instruction. /// /// \param __V /// A 128-bit integer vector containing the source bytes. /// \returns A 256-bit vector of [16 x i16] containing the sign-extended /// values. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi8_epi16(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m256i)__builtin_convertvector((__v16qs)__V, __v16hi); } /// Sign-extends bytes from the lower half of the 128-bit integer vector in /// \a __V and returns the 32-bit values in the corresponding elements of a /// 256-bit vector of [8 x i32]. /// /// \code{.operation} /// FOR i := 0 TO 7 /// j := i*8 /// k := i*32 /// result[k+31:k] := SignExtend(__V[j+7:j]) /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMOVSXBD instruction. /// /// \param __V /// A 128-bit integer vector containing the source bytes. /// \returns A 256-bit vector of [8 x i32] containing the sign-extended /// values. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi8_epi32(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si); } /// Sign-extends the first four bytes from the 128-bit integer vector in /// \a __V and returns the 64-bit values in the corresponding elements of a /// 256-bit vector of [4 x i64]. /// /// \code{.operation} /// result[63:0] := SignExtend(__V[7:0]) /// result[127:64] := SignExtend(__V[15:8]) /// result[191:128] := SignExtend(__V[23:16]) /// result[255:192] := SignExtend(__V[31:24]) /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMOVSXBQ instruction. /// /// \param __V /// A 128-bit integer vector containing the source bytes. /// \returns A 256-bit vector of [4 x i64] containing the sign-extended /// values. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi8_epi64(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4di); } /// Sign-extends 16-bit elements from the 128-bit vector of [8 x i16] in /// \a __V and returns the 32-bit values in the corresponding elements of a /// 256-bit vector of [8 x i32]. /// /// \code{.operation} /// FOR i := 0 TO 7 /// j := i*16 /// k := i*32 /// result[k+31:k] := SignExtend(__V[j+15:j]) /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMOVSXWD instruction. /// /// \param __V /// A 128-bit vector of [8 x i16] containing the source values. /// \returns A 256-bit vector of [8 x i32] containing the sign-extended /// values. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi16_epi32(__m128i __V) { return (__m256i)__builtin_convertvector((__v8hi)__V, __v8si); } /// Sign-extends 16-bit elements from the lower half of the 128-bit vector of /// [8 x i16] in \a __V and returns the 64-bit values in the corresponding /// elements of a 256-bit vector of [4 x i64]. /// /// \code{.operation} /// result[63:0] := SignExtend(__V[15:0]) /// result[127:64] := SignExtend(__V[31:16]) /// result[191:128] := SignExtend(__V[47:32]) /// result[255:192] := SignExtend(__V[64:48]) /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMOVSXWQ instruction. /// /// \param __V /// A 128-bit vector of [8 x i16] containing the source values. /// \returns A 256-bit vector of [4 x i64] containing the sign-extended /// values. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi16_epi64(__m128i __V) { return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4di); } /// Sign-extends 32-bit elements from the 128-bit vector of [4 x i32] in /// \a __V and returns the 64-bit values in the corresponding elements of a /// 256-bit vector of [4 x i64]. /// /// \code{.operation} /// result[63:0] := SignExtend(__V[31:0]) /// result[127:64] := SignExtend(__V[63:32]) /// result[191:128] := SignExtend(__V[95:64]) /// result[255:192] := SignExtend(__V[127:96]) /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMOVSXDQ instruction. /// /// \param __V /// A 128-bit vector of [4 x i32] containing the source values. /// \returns A 256-bit vector of [4 x i64] containing the sign-extended /// values. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi32_epi64(__m128i __V) { return (__m256i)__builtin_convertvector((__v4si)__V, __v4di); } /// Zero-extends bytes from the 128-bit integer vector in \a __V and returns /// the 16-bit values in the corresponding elements of a 256-bit vector /// of [16 x i16]. /// /// \code{.operation} /// FOR i := 0 TO 15 /// j := i*8 /// k := i*16 /// result[k+15:k] := ZeroExtend(__V[j+7:j]) /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMOVZXBW instruction. /// /// \param __V /// A 128-bit integer vector containing the source bytes. /// \returns A 256-bit vector of [16 x i16] containing the zero-extended /// values. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu8_epi16(__m128i __V) { return (__m256i)__builtin_convertvector((__v16qu)__V, __v16hi); } /// Zero-extends bytes from the lower half of the 128-bit integer vector in /// \a __V and returns the 32-bit values in the corresponding elements of a /// 256-bit vector of [8 x i32]. /// /// \code{.operation} /// FOR i := 0 TO 7 /// j := i*8 /// k := i*32 /// result[k+31:k] := ZeroExtend(__V[j+7:j]) /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMOVZXBD instruction. /// /// \param __V /// A 128-bit integer vector containing the source bytes. /// \returns A 256-bit vector of [8 x i32] containing the zero-extended /// values. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu8_epi32(__m128i __V) { return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si); } /// Zero-extends the first four bytes from the 128-bit integer vector in /// \a __V and returns the 64-bit values in the corresponding elements of a /// 256-bit vector of [4 x i64]. /// /// \code{.operation} /// result[63:0] := ZeroExtend(__V[7:0]) /// result[127:64] := ZeroExtend(__V[15:8]) /// result[191:128] := ZeroExtend(__V[23:16]) /// result[255:192] := ZeroExtend(__V[31:24]) /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMOVZXBQ instruction. /// /// \param __V /// A 128-bit integer vector containing the source bytes. /// \returns A 256-bit vector of [4 x i64] containing the zero-extended /// values. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu8_epi64(__m128i __V) { return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4di); } /// Zero-extends 16-bit elements from the 128-bit vector of [8 x i16] in /// \a __V and returns the 32-bit values in the corresponding elements of a /// 256-bit vector of [8 x i32]. /// /// \code{.operation} /// FOR i := 0 TO 7 /// j := i*16 /// k := i*32 /// result[k+31:k] := ZeroExtend(__V[j+15:j]) /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMOVZXWD instruction. /// /// \param __V /// A 128-bit vector of [8 x i16] containing the source values. /// \returns A 256-bit vector of [8 x i32] containing the zero-extended /// values. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu16_epi32(__m128i __V) { return (__m256i)__builtin_convertvector((__v8hu)__V, __v8si); } /// Zero-extends 16-bit elements from the lower half of the 128-bit vector of /// [8 x i16] in \a __V and returns the 64-bit values in the corresponding /// elements of a 256-bit vector of [4 x i64]. /// /// \code{.operation} /// result[63:0] := ZeroExtend(__V[15:0]) /// result[127:64] := ZeroExtend(__V[31:16]) /// result[191:128] := ZeroExtend(__V[47:32]) /// result[255:192] := ZeroExtend(__V[64:48]) /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMOVSXWQ instruction. /// /// \param __V /// A 128-bit vector of [8 x i16] containing the source values. /// \returns A 256-bit vector of [4 x i64] containing the zero-extended /// values. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu16_epi64(__m128i __V) { return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4di); } /// Zero-extends 32-bit elements from the 128-bit vector of [4 x i32] in /// \a __V and returns the 64-bit values in the corresponding elements of a /// 256-bit vector of [4 x i64]. /// /// \code{.operation} /// result[63:0] := ZeroExtend(__V[31:0]) /// result[127:64] := ZeroExtend(__V[63:32]) /// result[191:128] := ZeroExtend(__V[95:64]) /// result[255:192] := ZeroExtend(__V[127:96]) /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMOVZXDQ instruction. /// /// \param __V /// A 128-bit vector of [4 x i32] containing the source values. /// \returns A 256-bit vector of [4 x i64] containing the zero-extended /// values. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu32_epi64(__m128i __V) { return (__m256i)__builtin_convertvector((__v4su)__V, __v4di); } /// Multiplies signed 32-bit integers from even-numbered elements of two /// 256-bit vectors of [8 x i32] and returns the 64-bit products in the /// [4 x i64] result. /// /// \code{.operation} /// result[63:0] := __a[31:0] * __b[31:0] /// result[127:64] := __a[95:64] * __b[95:64] /// result[191:128] := __a[159:128] * __b[159:128] /// result[255:192] := __a[223:192] * __b[223:192] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMULDQ instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \param __b /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \returns A 256-bit vector of [4 x i64] containing the products. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mul_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmuldq256((__v8si)__a, (__v8si)__b); } /// Multiplies signed 16-bit integer elements of two 256-bit vectors of /// [16 x i16], truncates the 32-bit results to the most significant 18 /// bits, rounds by adding 1, and returns bits [16:1] of each rounded /// product in the [16 x i16] result. /// /// \code{.operation} /// FOR i := 0 TO 15 /// j := i*16 /// temp := ((__a[j+15:j] * __b[j+15:j]) >> 14) + 1 /// result[j+15:j] := temp[16:1] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMULHRSW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the rounded products. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mulhrs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)__a, (__v16hi)__b); } /// Multiplies unsigned 16-bit integer elements of two 256-bit vectors of /// [16 x i16], and returns the upper 16 bits of each 32-bit product in the /// [16 x i16] result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMULHUW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the products. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mulhi_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)__a, (__v16hi)__b); } /// Multiplies signed 16-bit integer elements of two 256-bit vectors of /// [16 x i16], and returns the upper 16 bits of each 32-bit product in the /// [16 x i16] result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMULHW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the products. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mulhi_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmulhw256((__v16hi)__a, (__v16hi)__b); } /// Multiplies signed 16-bit integer elements of two 256-bit vectors of /// [16 x i16], and returns the lower 16 bits of each 32-bit product in the /// [16 x i16] result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMULLW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the products. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mullo_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hu)__a * (__v16hu)__b); } /// Multiplies signed 32-bit integer elements of two 256-bit vectors of /// [8 x i32], and returns the lower 32 bits of each 64-bit product in the /// [8 x i32] result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMULLD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \param __b /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \returns A 256-bit vector of [8 x i32] containing the products. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mullo_epi32 (__m256i __a, __m256i __b) { return (__m256i)((__v8su)__a * (__v8su)__b); } /// Multiplies unsigned 32-bit integers from even-numered elements of two /// 256-bit vectors of [8 x i32] and returns the 64-bit products in the /// [4 x i64] result. /// /// \code{.operation} /// result[63:0] := __a[31:0] * __b[31:0] /// result[127:64] := __a[95:64] * __b[95:64] /// result[191:128] := __a[159:128] * __b[159:128] /// result[255:192] := __a[223:192] * __b[223:192] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMULUDQ instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \param __b /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \returns A 256-bit vector of [4 x i64] containing the products. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mul_epu32(__m256i __a, __m256i __b) { return __builtin_ia32_pmuludq256((__v8si)__a, (__v8si)__b); } /// Computes the bitwise OR of the 256-bit integer vectors in \a __a and /// \a __b. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPOR instruction. /// /// \param __a /// A 256-bit integer vector. /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_or_si256(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a | (__v4du)__b); } /// Computes four sum of absolute difference (SAD) operations on sets of eight /// unsigned 8-bit integers from the 256-bit integer vectors \a __a and /// \a __b. /// /// One SAD result is computed for each set of eight bytes from \a __a and /// eight bytes from \a __b. The zero-extended SAD value is returned in the /// corresponding 64-bit element of the result. /// /// A single SAD operation takes the differences between the corresponding /// bytes of \a __a and \a __b, takes the absolute value of each difference, /// and sums these eight values to form one 16-bit result. This operation /// is repeated four times with successive sets of eight bytes. /// /// \code{.operation} /// FOR i := 0 TO 3 /// j := i*64 /// temp0 := ABS(__a[j+7:j] - __b[j+7:j]) /// temp1 := ABS(__a[j+15:j+8] - __b[j+15:j+8]) /// temp2 := ABS(__a[j+23:j+16] - __b[j+23:j+16]) /// temp3 := ABS(__a[j+31:j+24] - __b[j+31:j+24]) /// temp4 := ABS(__a[j+39:j+32] - __b[j+39:j+32]) /// temp5 := ABS(__a[j+47:j+40] - __b[j+47:j+40]) /// temp6 := ABS(__a[j+55:j+48] - __b[j+55:j+48]) /// temp7 := ABS(__a[j+63:j+56] - __b[j+63:j+56]) /// result[j+15:j] := temp0 + temp1 + temp2 + temp3 + /// temp4 + temp5 + temp6 + temp7 /// result[j+63:j+16] := 0 /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSADBW instruction. /// /// \param __a /// A 256-bit integer vector. /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sad_epu8(__m256i __a, __m256i __b) { return __builtin_ia32_psadbw256((__v32qi)__a, (__v32qi)__b); } /// Shuffles 8-bit integers in the 256-bit integer vector \a __a according /// to control information in the 256-bit integer vector \a __b, and /// returns the 256-bit result. In effect there are two separate 128-bit /// shuffles in the lower and upper halves. /// /// \code{.operation} /// FOR i := 0 TO 31 /// j := i*8 /// IF __b[j+7] == 1 /// result[j+7:j] := 0 /// ELSE /// k := __b[j+3:j] * 8 /// IF i > 15 /// k := k + 128 /// FI /// result[j+7:j] := __a[k+7:k] /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSHUFB instruction. /// /// \param __a /// A 256-bit integer vector containing source values. /// \param __b /// A 256-bit integer vector containing control information to determine /// what goes into the corresponding byte of the result. If bit 7 of the /// control byte is 1, the result byte is 0; otherwise, bits 3:0 of the /// control byte specify the index (within the same 128-bit half) of \a __a /// to copy to the result byte. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_shuffle_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pshufb256((__v32qi)__a, (__v32qi)__b); } /// Shuffles 32-bit integers from the 256-bit vector of [8 x i32] in \a a /// according to control information in the integer literal \a imm, and /// returns the 256-bit result. In effect there are two parallel 128-bit /// shuffles in the lower and upper halves. /// /// \code{.operation} /// FOR i := 0 to 3 /// j := i*32 /// k := (imm >> i*2)[1:0] * 32 /// result[j+31:j] := a[k+31:k] /// result[128+j+31:128+j] := a[128+k+31:128+k] /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256i _mm256_shuffle_epi32(__m256i a, const int imm); /// \endcode /// /// This intrinsic corresponds to the \c VPSHUFB instruction. /// /// \param a /// A 256-bit vector of [8 x i32] containing source values. /// \param imm /// An immediate 8-bit value specifying which elements to copy from \a a. /// \a imm[1:0] specifies the index in \a a for elements 0 and 4 of the /// result, \a imm[3:2] specifies the index for elements 1 and 5, and so /// forth. /// \returns A 256-bit vector of [8 x i32] containing the result. #define _mm256_shuffle_epi32(a, imm) \ ((__m256i)__builtin_ia32_pshufd256((__v8si)(__m256i)(a), (int)(imm))) /// Shuffles 16-bit integers from the 256-bit vector of [16 x i16] in \a a /// according to control information in the integer literal \a imm, and /// returns the 256-bit result. The upper 64 bits of each 128-bit half /// are shuffled in parallel; the lower 64 bits of each 128-bit half are /// copied from \a a unchanged. /// /// \code{.operation} /// result[63:0] := a[63:0] /// result[191:128] := a[191:128] /// FOR i := 0 TO 3 /// j := i * 16 + 64 /// k := (imm >> i*2)[1:0] * 16 + 64 /// result[j+15:j] := a[k+15:k] /// result[128+j+15:128+j] := a[128+k+15:128+k] /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256i _mm256_shufflehi_epi16(__m256i a, const int imm); /// \endcode /// /// This intrinsic corresponds to the \c VPSHUFHW instruction. /// /// \param a /// A 256-bit vector of [16 x i16] containing source values. /// \param imm /// An immediate 8-bit value specifying which elements to copy from \a a. /// \a imm[1:0] specifies the index in \a a for elements 4 and 8 of the /// result, \a imm[3:2] specifies the index for elements 5 and 9, and so /// forth. Indexes are offset by 4 (so 0 means index 4, and so forth). /// \returns A 256-bit vector of [16 x i16] containing the result. #define _mm256_shufflehi_epi16(a, imm) \ ((__m256i)__builtin_ia32_pshufhw256((__v16hi)(__m256i)(a), (int)(imm))) /// Shuffles 16-bit integers from the 256-bit vector of [16 x i16] \a a /// according to control information in the integer literal \a imm, and /// returns the 256-bit [16 x i16] result. The lower 64 bits of each /// 128-bit half are shuffled; the upper 64 bits of each 128-bit half are /// copied from \a a unchanged. /// /// \code{.operation} /// result[127:64] := a[127:64] /// result[255:192] := a[255:192] /// FOR i := 0 TO 3 /// j := i * 16 /// k := (imm >> i*2)[1:0] * 16 /// result[j+15:j] := a[k+15:k] /// result[128+j+15:128+j] := a[128+k+15:128+k] /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256i _mm256_shufflelo_epi16(__m256i a, const int imm); /// \endcode /// /// This intrinsic corresponds to the \c VPSHUFLW instruction. /// /// \param a /// A 256-bit vector of [16 x i16] to use as a source of data for the /// result. /// \param imm /// An immediate 8-bit value specifying which elements to copy from \a a. /// \a imm[1:0] specifies the index in \a a for elements 0 and 8 of the /// result, \a imm[3:2] specifies the index for elements 1 and 9, and so /// forth. /// \returns A 256-bit vector of [16 x i16] containing the result. #define _mm256_shufflelo_epi16(a, imm) \ ((__m256i)__builtin_ia32_pshuflw256((__v16hi)(__m256i)(a), (int)(imm))) /// Sets each byte of the result to the corresponding byte of the 256-bit /// integer vector in \a __a, the negative of that byte, or zero, depending /// on whether the corresponding byte of the 256-bit integer vector in /// \a __b is greater than zero, less than zero, or equal to zero, /// respectively. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSIGNB instruction. /// /// \param __a /// A 256-bit integer vector. /// \param __b /// A 256-bit integer vector]. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sign_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b); } /// Sets each element of the result to the corresponding element of the /// 256-bit vector of [16 x i16] in \a __a, the negative of that element, /// or zero, depending on whether the corresponding element of the 256-bit /// vector of [16 x i16] in \a __b is greater than zero, less than zero, or /// equal to zero, respectively. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSIGNW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16]. /// \param __b /// A 256-bit vector of [16 x i16]. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sign_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b); } /// Sets each element of the result to the corresponding element of the /// 256-bit vector of [8 x i32] in \a __a, the negative of that element, or /// zero, depending on whether the corresponding element of the 256-bit /// vector of [8 x i32] in \a __b is greater than zero, less than zero, or /// equal to zero, respectively. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSIGND instruction. /// /// \param __a /// A 256-bit vector of [8 x i32]. /// \param __b /// A 256-bit vector of [8 x i32]. /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sign_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b); } /// Shifts each 128-bit half of the 256-bit integer vector \a a left by /// \a imm bytes, shifting in zero bytes, and returns the result. If \a imm /// is greater than 15, the returned result is all zeroes. /// /// \headerfile /// /// \code /// __m256i _mm256_slli_si256(__m256i a, const int imm); /// \endcode /// /// This intrinsic corresponds to the \c VPSLLDQ instruction. /// /// \param a /// A 256-bit integer vector to be shifted. /// \param imm /// An unsigned immediate value specifying the shift count (in bytes). /// \returns A 256-bit integer vector containing the result. #define _mm256_slli_si256(a, imm) \ ((__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), (int)(imm))) /// Shifts each 128-bit half of the 256-bit integer vector \a a left by /// \a imm bytes, shifting in zero bytes, and returns the result. If \a imm /// is greater than 15, the returned result is all zeroes. /// /// \headerfile /// /// \code /// __m256i _mm256_bslli_epi128(__m256i a, const int imm); /// \endcode /// /// This intrinsic corresponds to the \c VPSLLDQ instruction. /// /// \param a /// A 256-bit integer vector to be shifted. /// \param imm /// An unsigned immediate value specifying the shift count (in bytes). /// \returns A 256-bit integer vector containing the result. #define _mm256_bslli_epi128(a, imm) \ ((__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), (int)(imm))) /// Shifts each 16-bit element of the 256-bit vector of [16 x i16] in \a __a /// left by \a __count bits, shifting in zero bits, and returns the result. /// If \a __count is greater than 15, the returned result is all zeroes. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSLLW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] to be shifted. /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_slli_epi16(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psllwi256((__v16hi)__a, __count); } /// Shifts each 16-bit element of the 256-bit vector of [16 x i16] in \a __a /// left by the number of bits specified by the lower 64 bits of \a __count, /// shifting in zero bits, and returns the result. If \a __count is greater /// than 15, the returned result is all zeroes. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSLLW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] to be shifted. /// \param __count /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned /// shift count (in bits). The upper element is ignored. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sll_epi16(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psllw256((__v16hi)__a, (__v8hi)__count); } /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __a /// left by \a __count bits, shifting in zero bits, and returns the result. /// If \a __count is greater than 31, the returned result is all zeroes. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSLLD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] to be shifted. /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_slli_epi32(__m256i __a, int __count) { return (__m256i)__builtin_ia32_pslldi256((__v8si)__a, __count); } /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __a /// left by the number of bits given in the lower 64 bits of \a __count, /// shifting in zero bits, and returns the result. If \a __count is greater /// than 31, the returned result is all zeroes. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSLLD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] to be shifted. /// \param __count /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned /// shift count (in bits). The upper element is ignored. /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sll_epi32(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_pslld256((__v8si)__a, (__v4si)__count); } /// Shifts each 64-bit element of the 256-bit vector of [4 x i64] in \a __a /// left by \a __count bits, shifting in zero bits, and returns the result. /// If \a __count is greater than 63, the returned result is all zeroes. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSLLQ instruction. /// /// \param __a /// A 256-bit vector of [4 x i64] to be shifted. /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [4 x i64] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_slli_epi64(__m256i __a, int __count) { return __builtin_ia32_psllqi256((__v4di)__a, __count); } /// Shifts each 64-bit element of the 256-bit vector of [4 x i64] in \a __a /// left by the number of bits given in the lower 64 bits of \a __count, /// shifting in zero bits, and returns the result. If \a __count is greater /// than 63, the returned result is all zeroes. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSLLQ instruction. /// /// \param __a /// A 256-bit vector of [4 x i64] to be shifted. /// \param __count /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned /// shift count (in bits). The upper element is ignored. /// \returns A 256-bit vector of [4 x i64] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sll_epi64(__m256i __a, __m128i __count) { return __builtin_ia32_psllq256((__v4di)__a, __count); } /// Shifts each 16-bit element of the 256-bit vector of [16 x i16] in \a __a /// right by \a __count bits, shifting in sign bits, and returns the result. /// If \a __count is greater than 15, each element of the result is either /// 0 or -1 according to the corresponding input sign bit. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSRAW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] to be shifted. /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srai_epi16(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psrawi256((__v16hi)__a, __count); } /// Shifts each 16-bit element of the 256-bit vector of [16 x i16] in \a __a /// right by the number of bits given in the lower 64 bits of \a __count, /// shifting in sign bits, and returns the result. If \a __count is greater /// than 15, each element of the result is either 0 or -1 according to the /// corresponding input sign bit. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSRAW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] to be shifted. /// \param __count /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned /// shift count (in bits). The upper element is ignored. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sra_epi16(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psraw256((__v16hi)__a, (__v8hi)__count); } /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __a /// right by \a __count bits, shifting in sign bits, and returns the result. /// If \a __count is greater than 31, each element of the result is either /// 0 or -1 according to the corresponding input sign bit. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSRAD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] to be shifted. /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srai_epi32(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psradi256((__v8si)__a, __count); } /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __a /// right by the number of bits given in the lower 64 bits of \a __count, /// shifting in sign bits, and returns the result. If \a __count is greater /// than 31, each element of the result is either 0 or -1 according to the /// corresponding input sign bit. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSRAD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] to be shifted. /// \param __count /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned /// shift count (in bits). The upper element is ignored. /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sra_epi32(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psrad256((__v8si)__a, (__v4si)__count); } /// Shifts each 128-bit half of the 256-bit integer vector in \a a right by /// \a imm bytes, shifting in zero bytes, and returns the result. If /// \a imm is greater than 15, the returned result is all zeroes. /// /// \headerfile /// /// \code /// __m256i _mm256_srli_si256(__m256i a, const int imm); /// \endcode /// /// This intrinsic corresponds to the \c VPSRLDQ instruction. /// /// \param a /// A 256-bit integer vector to be shifted. /// \param imm /// An unsigned immediate value specifying the shift count (in bytes). /// \returns A 256-bit integer vector containing the result. #define _mm256_srli_si256(a, imm) \ ((__m256i)__builtin_ia32_psrldqi256_byteshift((__m256i)(a), (int)(imm))) /// Shifts each 128-bit half of the 256-bit integer vector in \a a right by /// \a imm bytes, shifting in zero bytes, and returns the result. If /// \a imm is greater than 15, the returned result is all zeroes. /// /// \headerfile /// /// \code /// __m256i _mm256_bsrli_epi128(__m256i a, const int imm); /// \endcode /// /// This intrinsic corresponds to the \c VPSRLDQ instruction. /// /// \param a /// A 256-bit integer vector to be shifted. /// \param imm /// An unsigned immediate value specifying the shift count (in bytes). /// \returns A 256-bit integer vector containing the result. #define _mm256_bsrli_epi128(a, imm) \ ((__m256i)__builtin_ia32_psrldqi256_byteshift((__m256i)(a), (int)(imm))) /// Shifts each 16-bit element of the 256-bit vector of [16 x i16] in \a __a /// right by \a __count bits, shifting in zero bits, and returns the result. /// If \a __count is greater than 15, the returned result is all zeroes. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSRLW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] to be shifted. /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srli_epi16(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psrlwi256((__v16hi)__a, __count); } /// Shifts each 16-bit element of the 256-bit vector of [16 x i16] in \a __a /// right by the number of bits given in the lower 64 bits of \a __count, /// shifting in zero bits, and returns the result. If \a __count is greater /// than 15, the returned result is all zeroes. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSRLW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] to be shifted. /// \param __count /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned /// shift count (in bits). The upper element is ignored. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srl_epi16(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psrlw256((__v16hi)__a, (__v8hi)__count); } /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __a /// right by \a __count bits, shifting in zero bits, and returns the result. /// If \a __count is greater than 31, the returned result is all zeroes. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSRLD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] to be shifted. /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srli_epi32(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psrldi256((__v8si)__a, __count); } /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __a /// right by the number of bits given in the lower 64 bits of \a __count, /// shifting in zero bits, and returns the result. If \a __count is greater /// than 31, the returned result is all zeroes. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSRLD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] to be shifted. /// \param __count /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned /// shift count (in bits). The upper element is ignored. /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srl_epi32(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psrld256((__v8si)__a, (__v4si)__count); } /// Shifts each 64-bit element of the 256-bit vector of [4 x i64] in \a __a /// right by \a __count bits, shifting in zero bits, and returns the result. /// If \a __count is greater than 63, the returned result is all zeroes. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSRLQ instruction. /// /// \param __a /// A 256-bit vector of [4 x i64] to be shifted. /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [4 x i64] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srli_epi64(__m256i __a, int __count) { return __builtin_ia32_psrlqi256((__v4di)__a, __count); } /// Shifts each 64-bit element of the 256-bit vector of [4 x i64] in \a __a /// right by the number of bits given in the lower 64 bits of \a __count, /// shifting in zero bits, and returns the result. If \a __count is greater /// than 63, the returned result is all zeroes. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSRLQ instruction. /// /// \param __a /// A 256-bit vector of [4 x i64] to be shifted. /// \param __count /// A 128-bit vector of [2 x i64] whose lower element gives the unsigned /// shift count (in bits). The upper element is ignored. /// \returns A 256-bit vector of [4 x i64] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srl_epi64(__m256i __a, __m128i __count) { return __builtin_ia32_psrlq256((__v4di)__a, __count); } /// Subtracts 8-bit integers from corresponding bytes of two 256-bit integer /// vectors. Returns the lower 8 bits of each difference in the /// corresponding byte of the 256-bit integer vector result (overflow is /// ignored). /// /// \code{.operation} /// FOR i := 0 TO 31 /// j := i*8 /// result[j+7:j] := __a[j+7:j] - __b[j+7:j] /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSUBB instruction. /// /// \param __a /// A 256-bit integer vector containing the minuends. /// \param __b /// A 256-bit integer vector containing the subtrahends. /// \returns A 256-bit integer vector containing the differences. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi8(__m256i __a, __m256i __b) { return (__m256i)((__v32qu)__a - (__v32qu)__b); } /// Subtracts 16-bit integers from corresponding elements of two 256-bit /// vectors of [16 x i16]. Returns the lower 16 bits of each difference in /// the corresponding element of the [16 x i16] result (overflow is /// ignored). /// /// \code{.operation} /// FOR i := 0 TO 15 /// j := i*16 /// result[j+15:j] := __a[j+15:j] - __b[j+15:j] /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSUBW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] containing the minuends. /// \param __b /// A 256-bit vector of [16 x i16] containing the subtrahends. /// \returns A 256-bit vector of [16 x i16] containing the differences. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hu)__a - (__v16hu)__b); } /// Subtracts 32-bit integers from corresponding elements of two 256-bit /// vectors of [8 x i32]. Returns the lower 32 bits of each difference in /// the corresponding element of the [8 x i32] result (overflow is ignored). /// /// \code{.operation} /// FOR i := 0 TO 7 /// j := i*32 /// result[j+31:j] := __a[j+31:j] - __b[j+31:j] /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSUBD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] containing the minuends. /// \param __b /// A 256-bit vector of [8 x i32] containing the subtrahends. /// \returns A 256-bit vector of [8 x i32] containing the differences. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8su)__a - (__v8su)__b); } /// Subtracts 64-bit integers from corresponding elements of two 256-bit /// vectors of [4 x i64]. Returns the lower 64 bits of each difference in /// the corresponding element of the [4 x i64] result (overflow is ignored). /// /// \code{.operation} /// FOR i := 0 TO 3 /// j := i*64 /// result[j+63:j] := __a[j+63:j] - __b[j+63:j] /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSUBQ instruction. /// /// \param __a /// A 256-bit vector of [4 x i64] containing the minuends. /// \param __b /// A 256-bit vector of [4 x i64] containing the subtrahends. /// \returns A 256-bit vector of [4 x i64] containing the differences. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a - (__v4du)__b); } /// Subtracts 8-bit integers from corresponding bytes of two 256-bit integer /// vectors using signed saturation, and returns each differences in the /// corresponding byte of the 256-bit integer vector result. /// /// \code{.operation} /// FOR i := 0 TO 31 /// j := i*8 /// result[j+7:j] := SATURATE8(__a[j+7:j] - __b[j+7:j]) /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSUBSB instruction. /// /// \param __a /// A 256-bit integer vector containing the minuends. /// \param __b /// A 256-bit integer vector containing the subtrahends. /// \returns A 256-bit integer vector containing the differences. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_sub_sat((__v32qs)__a, (__v32qs)__b); } /// Subtracts 16-bit integers from corresponding elements of two 256-bit /// vectors of [16 x i16] using signed saturation, and returns each /// difference in the corresponding element of the [16 x i16] result. /// /// \code{.operation} /// FOR i := 0 TO 15 /// j := i*16 /// result[j+7:j] := SATURATE16(__a[j+7:j] - __b[j+7:j]) /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSUBSW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] containing the minuends. /// \param __b /// A 256-bit vector of [16 x i16] containing the subtrahends. /// \returns A 256-bit vector of [16 x i16] containing the differences. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_sub_sat((__v16hi)__a, (__v16hi)__b); } /// Subtracts 8-bit integers from corresponding bytes of two 256-bit integer /// vectors using unsigned saturation, and returns each difference in the /// corresponding byte of the 256-bit integer vector result. For each byte, /// computes result = __a - __b . /// /// \code{.operation} /// FOR i := 0 TO 31 /// j := i*8 /// result[j+7:j] := SATURATE8U(__a[j+7:j] - __b[j+7:j]) /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSUBUSB instruction. /// /// \param __a /// A 256-bit integer vector containing the minuends. /// \param __b /// A 256-bit integer vector containing the subtrahends. /// \returns A 256-bit integer vector containing the differences. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epu8(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_sub_sat((__v32qu)__a, (__v32qu)__b); } /// Subtracts 16-bit integers from corresponding elements of two 256-bit /// vectors of [16 x i16] using unsigned saturation, and returns each /// difference in the corresponding element of the [16 x i16] result. /// /// \code{.operation} /// FOR i := 0 TO 15 /// j := i*16 /// result[j+15:j] := SATURATE16U(__a[j+15:j] - __b[j+15:j]) /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSUBUSW instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] containing the minuends. /// \param __b /// A 256-bit vector of [16 x i16] containing the subtrahends. /// \returns A 256-bit vector of [16 x i16] containing the differences. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_sub_sat((__v16hu)__a, (__v16hu)__b); } /// Unpacks and interleaves 8-bit integers from parts of the 256-bit integer /// vectors in \a __a and \a __b to form the 256-bit result. Specifically, /// uses the upper 64 bits of each 128-bit half of \a __a and \a __b as /// input; other bits in these parameters are ignored. /// /// \code{.operation} /// result[7:0] := __a[71:64] /// result[15:8] := __b[71:64] /// result[23:16] := __a[79:72] /// result[31:24] := __b[79:72] /// . . . /// result[127:120] := __b[127:120] /// result[135:128] := __a[199:192] /// . . . /// result[255:248] := __b[255:248] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPUNPCKHBW instruction. /// /// \param __a /// A 256-bit integer vector used as the source for the even-numbered bytes /// of the result. /// \param __b /// A 256-bit integer vector used as the source for the odd-numbered bytes /// of the result. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpackhi_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 8, 32+8, 9, 32+9, 10, 32+10, 11, 32+11, 12, 32+12, 13, 32+13, 14, 32+14, 15, 32+15, 24, 32+24, 25, 32+25, 26, 32+26, 27, 32+27, 28, 32+28, 29, 32+29, 30, 32+30, 31, 32+31); } /// Unpacks and interleaves 16-bit integers from parts of the 256-bit vectors /// of [16 x i16] in \a __a and \a __b to return the resulting 256-bit /// vector of [16 x i16]. Specifically, uses the upper 64 bits of each /// 128-bit half of \a __a and \a __b as input; other bits in these /// parameters are ignored. /// /// \code{.operation} /// result[15:0] := __a[79:64] /// result[31:16] := __b[79:64] /// result[47:32] := __a[95:80] /// result[63:48] := __b[95:80] /// . . . /// result[127:112] := __b[127:112] /// result[143:128] := __a[211:196] /// . . . /// result[255:240] := __b[255:240] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPUNPCKHWD instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] used as the source for the even-numbered /// elements of the result. /// \param __b /// A 256-bit vector of [16 x i16] used as the source for the odd-numbered /// elements of the result. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpackhi_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15); } /// Unpacks and interleaves 32-bit integers from parts of the 256-bit vectors /// of [8 x i32] in \a __a and \a __b to return the resulting 256-bit vector /// of [8 x i32]. Specifically, uses the upper 64 bits of each 128-bit half /// of \a __a and \a __b as input; other bits in these parameters are /// ignored. /// /// \code{.operation} /// result[31:0] := __a[95:64] /// result[63:32] := __b[95:64] /// result[95:64] := __a[127:96] /// result[127:96] := __b[127:96] /// result[159:128] := __a[223:192] /// result[191:160] := __b[223:192] /// result[223:192] := __a[255:224] /// result[255:224] := __b[255:224] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPUNPCKHDQ instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] used as the source for the even-numbered /// elements of the result. /// \param __b /// A 256-bit vector of [8 x i32] used as the source for the odd-numbered /// elements of the result. /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpackhi_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 2, 8+2, 3, 8+3, 6, 8+6, 7, 8+7); } /// Unpacks and interleaves 64-bit integers from parts of the 256-bit vectors /// of [4 x i64] in \a __a and \a __b to return the resulting 256-bit vector /// of [4 x i64]. Specifically, uses the upper 64 bits of each 128-bit half /// of \a __a and \a __b as input; other bits in these parameters are /// ignored. /// /// \code{.operation} /// result[63:0] := __a[127:64] /// result[127:64] := __b[127:64] /// result[191:128] := __a[255:192] /// result[255:192] := __b[255:192] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPUNPCKHQDQ instruction. /// /// \param __a /// A 256-bit vector of [4 x i64] used as the source for the even-numbered /// elements of the result. /// \param __b /// A 256-bit vector of [4 x i64] used as the source for the odd-numbered /// elements of the result. /// \returns A 256-bit vector of [4 x i64] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpackhi_epi64(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 1, 4+1, 3, 4+3); } /// Unpacks and interleaves 8-bit integers from parts of the 256-bit integer /// vectors in \a __a and \a __b to form the 256-bit result. Specifically, /// uses the lower 64 bits of each 128-bit half of \a __a and \a __b as /// input; other bits in these parameters are ignored. /// /// \code{.operation} /// result[7:0] := __a[7:0] /// result[15:8] := __b[7:0] /// result[23:16] := __a[15:8] /// result[31:24] := __b[15:8] /// . . . /// result[127:120] := __b[63:56] /// result[135:128] := __a[135:128] /// . . . /// result[255:248] := __b[191:184] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPUNPCKLBW instruction. /// /// \param __a /// A 256-bit integer vector used as the source for the even-numbered bytes /// of the result. /// \param __b /// A 256-bit integer vector used as the source for the odd-numbered bytes /// of the result. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpacklo_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 16, 32+16, 17, 32+17, 18, 32+18, 19, 32+19, 20, 32+20, 21, 32+21, 22, 32+22, 23, 32+23); } /// Unpacks and interleaves 16-bit integers from parts of the 256-bit vectors /// of [16 x i16] in \a __a and \a __b to return the resulting 256-bit /// vector of [16 x i16]. Specifically, uses the lower 64 bits of each /// 128-bit half of \a __a and \a __b as input; other bits in these /// parameters are ignored. /// /// \code{.operation} /// result[15:0] := __a[15:0] /// result[31:16] := __b[15:0] /// result[47:32] := __a[31:16] /// result[63:48] := __b[31:16] /// . . . /// result[127:112] := __b[63:48] /// result[143:128] := __a[143:128] /// . . . /// result[255:239] := __b[191:176] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPUNPCKLWD instruction. /// /// \param __a /// A 256-bit vector of [16 x i16] used as the source for the even-numbered /// elements of the result. /// \param __b /// A 256-bit vector of [16 x i16] used as the source for the odd-numbered /// elements of the result. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpacklo_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11); } /// Unpacks and interleaves 32-bit integers from parts of the 256-bit vectors /// of [8 x i32] in \a __a and \a __b to return the resulting 256-bit vector /// of [8 x i32]. Specifically, uses the lower 64 bits of each 128-bit half /// of \a __a and \a __b as input; other bits in these parameters are /// ignored. /// /// \code{.operation} /// result[31:0] := __a[31:0] /// result[63:32] := __b[31:0] /// result[95:64] := __a[63:32] /// result[127:96] := __b[63:32] /// result[159:128] := __a[159:128] /// result[191:160] := __b[159:128] /// result[223:192] := __a[191:160] /// result[255:224] := __b[191:190] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPUNPCKLDQ instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] used as the source for the even-numbered /// elements of the result. /// \param __b /// A 256-bit vector of [8 x i32] used as the source for the odd-numbered /// elements of the result. /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpacklo_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 0, 8+0, 1, 8+1, 4, 8+4, 5, 8+5); } /// Unpacks and interleaves 64-bit integers from parts of the 256-bit vectors /// of [4 x i64] in \a __a and \a __b to return the resulting 256-bit vector /// of [4 x i64]. Specifically, uses the lower 64 bits of each 128-bit half /// of \a __a and \a __b as input; other bits in these parameters are /// ignored. /// /// \code{.operation} /// result[63:0] := __a[63:0] /// result[127:64] := __b[63:0] /// result[191:128] := __a[191:128] /// result[255:192] := __b[191:128] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPUNPCKLQDQ instruction. /// /// \param __a /// A 256-bit vector of [4 x i64] used as the source for the even-numbered /// elements of the result. /// \param __b /// A 256-bit vector of [4 x i64] used as the source for the odd-numbered /// elements of the result. /// \returns A 256-bit vector of [4 x i64] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpacklo_epi64(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 0, 4+0, 2, 4+2); } /// Computes the bitwise XOR of the 256-bit integer vectors in \a __a and /// \a __b. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPXOR instruction. /// /// \param __a /// A 256-bit integer vector. /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_xor_si256(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a ^ (__v4du)__b); } /// Loads the 256-bit integer vector from memory \a __V using a non-temporal /// memory hint and returns the vector. \a __V must be aligned on a 32-byte /// boundary. /// /// \headerfile /// /// This intrinsic corresponds to the \c VMOVNTDQA instruction. /// /// \param __V /// A pointer to the 32-byte aligned memory containing the vector to load. /// \returns A 256-bit integer vector loaded from memory. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_stream_load_si256(__m256i const *__V) { typedef __v4di __v4di_aligned __attribute__((aligned(32))); return (__m256i)__builtin_nontemporal_load((const __v4di_aligned *)__V); } /// Broadcasts the 32-bit floating-point value from the low element of the /// 128-bit vector of [4 x float] in \a __X to all elements of the result's /// 128-bit vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the \c VBROADCASTSS instruction. /// /// \param __X /// A 128-bit vector of [4 x float] whose low element will be broadcast. /// \returns A 128-bit vector of [4 x float] containing the result. static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_broadcastss_ps(__m128 __X) { return (__m128)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0); } /// Broadcasts the 64-bit floating-point value from the low element of the /// 128-bit vector of [2 x double] in \a __a to both elements of the /// result's 128-bit vector of [2 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the \c MOVDDUP instruction. /// /// \param __a /// A 128-bit vector of [2 x double] whose low element will be broadcast. /// \returns A 128-bit vector of [2 x double] containing the result. static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_broadcastsd_pd(__m128d __a) { return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0); } /// Broadcasts the 32-bit floating-point value from the low element of the /// 128-bit vector of [4 x float] in \a __X to all elements of the /// result's 256-bit vector of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the \c VBROADCASTSS instruction. /// /// \param __X /// A 128-bit vector of [4 x float] whose low element will be broadcast. /// \returns A 256-bit vector of [8 x float] containing the result. static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_broadcastss_ps(__m128 __X) { return (__m256)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0, 0, 0, 0, 0); } /// Broadcasts the 64-bit floating-point value from the low element of the /// 128-bit vector of [2 x double] in \a __X to all elements of the /// result's 256-bit vector of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the \c VBROADCASTSD instruction. /// /// \param __X /// A 128-bit vector of [2 x double] whose low element will be broadcast. /// \returns A 256-bit vector of [4 x double] containing the result. static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_broadcastsd_pd(__m128d __X) { return (__m256d)__builtin_shufflevector((__v2df)__X, (__v2df)__X, 0, 0, 0, 0); } /// Broadcasts the 128-bit integer data from \a __X to both the lower and /// upper halves of the 256-bit result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VBROADCASTI128 instruction. /// /// \param __X /// A 128-bit integer vector to be broadcast. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastsi128_si256(__m128i __X) { return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 1, 0, 1); } #define _mm_broadcastsi128_si256(X) _mm256_broadcastsi128_si256(X) /// Merges 32-bit integer elements from either of the two 128-bit vectors of /// [4 x i32] in \a V1 or \a V2 to the result's 128-bit vector of [4 x i32], /// as specified by the immediate integer operand \a M. /// /// \code{.operation} /// FOR i := 0 TO 3 /// j := i*32 /// IF M[i] == 0 /// result[31+j:j] := V1[31+j:j] /// ELSE /// result[31+j:j] := V2[32+j:j] /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m128i _mm_blend_epi32(__m128i V1, __m128i V2, const int M); /// \endcode /// /// This intrinsic corresponds to the \c VPBLENDDD instruction. /// /// \param V1 /// A 128-bit vector of [4 x i32] containing source values. /// \param V2 /// A 128-bit vector of [4 x i32] containing source values. /// \param M /// An immediate 8-bit integer operand, with bits [3:0] specifying the /// source for each element of the result. The position of the mask bit /// corresponds to the index of a copied value. When a mask bit is 0, the /// element is copied from \a V1; otherwise, it is copied from \a V2. /// \returns A 128-bit vector of [4 x i32] containing the result. #define _mm_blend_epi32(V1, V2, M) \ ((__m128i)__builtin_ia32_pblendd128((__v4si)(__m128i)(V1), \ (__v4si)(__m128i)(V2), (int)(M))) /// Merges 32-bit integer elements from either of the two 256-bit vectors of /// [8 x i32] in \a V1 or \a V2 to return a 256-bit vector of [8 x i32], /// as specified by the immediate integer operand \a M. /// /// \code{.operation} /// FOR i := 0 TO 7 /// j := i*32 /// IF M[i] == 0 /// result[31+j:j] := V1[31+j:j] /// ELSE /// result[31+j:j] := V2[32+j:j] /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256i _mm256_blend_epi32(__m256i V1, __m256i V2, const int M); /// \endcode /// /// This intrinsic corresponds to the \c VPBLENDDD instruction. /// /// \param V1 /// A 256-bit vector of [8 x i32] containing source values. /// \param V2 /// A 256-bit vector of [8 x i32] containing source values. /// \param M /// An immediate 8-bit integer operand, with bits [7:0] specifying the /// source for each element of the result. The position of the mask bit /// corresponds to the index of a copied value. When a mask bit is 0, the /// element is copied from \a V1; otherwise, it is is copied from \a V2. /// \returns A 256-bit vector of [8 x i32] containing the result. #define _mm256_blend_epi32(V1, V2, M) \ ((__m256i)__builtin_ia32_pblendd256((__v8si)(__m256i)(V1), \ (__v8si)(__m256i)(V2), (int)(M))) /// Broadcasts the low byte from the 128-bit integer vector in \a __X to all /// bytes of the 256-bit result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPBROADCASTB instruction. /// /// \param __X /// A 128-bit integer vector whose low byte will be broadcast. /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastb_epi8(__m128i __X) { return (__m256i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } /// Broadcasts the low element from the 128-bit vector of [8 x i16] in \a __X /// to all elements of the result's 256-bit vector of [16 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPBROADCASTW instruction. /// /// \param __X /// A 128-bit vector of [8 x i16] whose low element will be broadcast. /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastw_epi16(__m128i __X) { return (__m256i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } /// Broadcasts the low element from the 128-bit vector of [4 x i32] in \a __X /// to all elements of the result's 256-bit vector of [8 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPBROADCASTD instruction. /// /// \param __X /// A 128-bit vector of [4 x i32] whose low element will be broadcast. /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastd_epi32(__m128i __X) { return (__m256i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0, 0, 0, 0, 0); } /// Broadcasts the low element from the 128-bit vector of [2 x i64] in \a __X /// to all elements of the result's 256-bit vector of [4 x i64]. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPBROADCASTQ instruction. /// /// \param __X /// A 128-bit vector of [2 x i64] whose low element will be broadcast. /// \returns A 256-bit vector of [4 x i64] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastq_epi64(__m128i __X) { return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0, 0, 0); } /// Broadcasts the low byte from the 128-bit integer vector in \a __X to all /// bytes of the 128-bit result. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPBROADCASTB instruction. /// /// \param __X /// A 128-bit integer vector whose low byte will be broadcast. /// \returns A 128-bit integer vector containing the result. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastb_epi8(__m128i __X) { return (__m128i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } /// Broadcasts the low element from the 128-bit vector of [8 x i16] in /// \a __X to all elements of the result's 128-bit vector of [8 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPBROADCASTW instruction. /// /// \param __X /// A 128-bit vector of [8 x i16] whose low element will be broadcast. /// \returns A 128-bit vector of [8 x i16] containing the result. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastw_epi16(__m128i __X) { return (__m128i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0); } /// Broadcasts the low element from the 128-bit vector of [4 x i32] in \a __X /// to all elements of the result's vector of [4 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPBROADCASTD instruction. /// /// \param __X /// A 128-bit vector of [4 x i32] whose low element will be broadcast. /// \returns A 128-bit vector of [4 x i32] containing the result. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastd_epi32(__m128i __X) { return (__m128i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0); } /// Broadcasts the low element from the 128-bit vector of [2 x i64] in \a __X /// to both elements of the result's 128-bit vector of [2 x i64]. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPBROADCASTQ instruction. /// /// \param __X /// A 128-bit vector of [2 x i64] whose low element will be broadcast. /// \returns A 128-bit vector of [2 x i64] containing the result. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastq_epi64(__m128i __X) { return (__m128i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0); } /// Sets the result's 256-bit vector of [8 x i32] to copies of elements of the /// 256-bit vector of [8 x i32] in \a __a as specified by indexes in the /// elements of the 256-bit vector of [8 x i32] in \a __b. /// /// \code{.operation} /// FOR i := 0 TO 7 /// j := i*32 /// k := __b[j+2:j] * 32 /// result[j+31:j] := __a[k+31:k] /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPERMD instruction. /// /// \param __a /// A 256-bit vector of [8 x i32] containing the source values. /// \param __b /// A 256-bit vector of [8 x i32] containing indexes of values to use from /// \a __a. /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_permvarsi256((__v8si)__a, (__v8si)__b); } /// Sets the result's 256-bit vector of [4 x double] to copies of elements of /// the 256-bit vector of [4 x double] in \a V as specified by the /// immediate value \a M. /// /// \code{.operation} /// FOR i := 0 TO 3 /// j := i*64 /// k := (M >> i*2)[1:0] * 64 /// result[j+63:j] := V[k+63:k] /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256d _mm256_permute4x64_pd(__m256d V, const int M); /// \endcode /// /// This intrinsic corresponds to the \c VPERMPD instruction. /// /// \param V /// A 256-bit vector of [4 x double] containing the source values. /// \param M /// An immediate 8-bit value specifying which elements to copy from \a V. /// \a M[1:0] specifies the index in \a a for element 0 of the result, /// \a M[3:2] specifies the index for element 1, and so forth. /// \returns A 256-bit vector of [4 x double] containing the result. #define _mm256_permute4x64_pd(V, M) \ ((__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(V), (int)(M))) /// Sets the result's 256-bit vector of [8 x float] to copies of elements of /// the 256-bit vector of [8 x float] in \a __a as specified by indexes in /// the elements of the 256-bit vector of [8 x i32] in \a __b. /// /// \code{.operation} /// FOR i := 0 TO 7 /// j := i*32 /// k := __b[j+2:j] * 32 /// result[j+31:j] := __a[k+31:k] /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPERMPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float] containing the source values. /// \param __b /// A 256-bit vector of [8 x i32] containing indexes of values to use from /// \a __a. /// \returns A 256-bit vector of [8 x float] containing the result. static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_permutevar8x32_ps(__m256 __a, __m256i __b) { return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8si)__b); } /// Sets the result's 256-bit vector of [4 x i64] result to copies of elements /// of the 256-bit vector of [4 x i64] in \a V as specified by the /// immediate value \a M. /// /// \code{.operation} /// FOR i := 0 TO 3 /// j := i*64 /// k := (M >> i*2)[1:0] * 64 /// result[j+63:j] := V[k+63:k] /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256i _mm256_permute4x64_epi64(__m256i V, const int M); /// \endcode /// /// This intrinsic corresponds to the \c VPERMQ instruction. /// /// \param V /// A 256-bit vector of [4 x i64] containing the source values. /// \param M /// An immediate 8-bit value specifying which elements to copy from \a V. /// \a M[1:0] specifies the index in \a a for element 0 of the result, /// \a M[3:2] specifies the index for element 1, and so forth. /// \returns A 256-bit vector of [4 x i64] containing the result. #define _mm256_permute4x64_epi64(V, M) \ ((__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(V), (int)(M))) /// Sets each half of the 256-bit result either to zero or to one of the /// four possible 128-bit halves of the 256-bit vectors \a V1 and \a V2, /// as specified by the immediate value \a M. /// /// \code{.operation} /// FOR i := 0 TO 1 /// j := i*128 /// k := M >> (i*4) /// IF k[3] == 0 /// CASE (k[1:0]) OF /// 0: result[127+j:j] := V1[127:0] /// 1: result[127+j:j] := V1[255:128] /// 2: result[127+j:j] := V2[127:0] /// 3: result[127+j:j] := V2[255:128] /// ESAC /// ELSE /// result[127+j:j] := 0 /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256i _mm256_permute2x128_si256(__m256i V1, __m256i V2, const int M); /// \endcode /// /// This intrinsic corresponds to the \c VPERM2I128 instruction. /// /// \param V1 /// A 256-bit integer vector containing source values. /// \param V2 /// A 256-bit integer vector containing source values. /// \param M /// An immediate value specifying how to form the result. Bits [3:0] /// control the lower half of the result, bits [7:4] control the upper half. /// Within each 4-bit control value, if bit 3 is 1, the result is zero, /// otherwise bits [1:0] determine the source as follows. \n /// 0: the lower half of \a V1 \n /// 1: the upper half of \a V1 \n /// 2: the lower half of \a V2 \n /// 3: the upper half of \a V2 /// \returns A 256-bit integer vector containing the result. #define _mm256_permute2x128_si256(V1, V2, M) \ ((__m256i)__builtin_ia32_permti256((__m256i)(V1), (__m256i)(V2), (int)(M))) /// Extracts half of the 256-bit vector \a V to the 128-bit result. If bit 0 /// of the immediate \a M is zero, extracts the lower half of the result; /// otherwise, extracts the upper half. /// /// \headerfile /// /// \code /// __m128i _mm256_extracti128_si256(__m256i V, const int M); /// \endcode /// /// This intrinsic corresponds to the \c VEXTRACTI128 instruction. /// /// \param V /// A 256-bit integer vector containing the source values. /// \param M /// An immediate value specifying which half of \a V to extract. /// \returns A 128-bit integer vector containing the result. #define _mm256_extracti128_si256(V, M) \ ((__m128i)__builtin_ia32_extract128i256((__v4di)(__m256i)(V), (int)(M))) /// Copies the 256-bit vector \a V1 to the result, then overwrites half of the /// result with the 128-bit vector \a V2. If bit 0 of the immediate \a M /// is zero, overwrites the lower half of the result; otherwise, /// overwrites the upper half. /// /// \headerfile /// /// \code /// __m256i _mm256_inserti128_si256(__m256i V1, __m128i V2, const int M); /// \endcode /// /// This intrinsic corresponds to the \c VINSERTI128 instruction. /// /// \param V1 /// A 256-bit integer vector containing a source value. /// \param V2 /// A 128-bit integer vector containing a source value. /// \param M /// An immediate value specifying where to put \a V2 in the result. /// \returns A 256-bit integer vector containing the result. #define _mm256_inserti128_si256(V1, V2, M) \ ((__m256i)__builtin_ia32_insert128i256((__v4di)(__m256i)(V1), \ (__v2di)(__m128i)(V2), (int)(M))) /// Conditionally loads eight 32-bit integer elements from memory \a __X, if /// the most significant bit of the corresponding element in the mask /// \a __M is set; otherwise, sets that element of the result to zero. /// Returns the 256-bit [8 x i32] result. /// /// \code{.operation} /// FOR i := 0 TO 7 /// j := i*32 /// IF __M[j+31] == 1 /// result[j+31:j] := Load32(__X+(i*4)) /// ELSE /// result[j+31:j] := 0 /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMASKMOVD instruction. /// /// \param __X /// A pointer to the memory used for loading values. /// \param __M /// A 256-bit vector of [8 x i32] containing the mask bits. /// \returns A 256-bit vector of [8 x i32] containing the loaded or zeroed /// elements. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskload_epi32(int const *__X, __m256i __M) { return (__m256i)__builtin_ia32_maskloadd256((const __v8si *)__X, (__v8si)__M); } /// Conditionally loads four 64-bit integer elements from memory \a __X, if /// the most significant bit of the corresponding element in the mask /// \a __M is set; otherwise, sets that element of the result to zero. /// Returns the 256-bit [4 x i64] result. /// /// \code{.operation} /// FOR i := 0 TO 3 /// j := i*64 /// IF __M[j+63] == 1 /// result[j+63:j] := Load64(__X+(i*8)) /// ELSE /// result[j+63:j] := 0 /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMASKMOVQ instruction. /// /// \param __X /// A pointer to the memory used for loading values. /// \param __M /// A 256-bit vector of [4 x i64] containing the mask bits. /// \returns A 256-bit vector of [4 x i64] containing the loaded or zeroed /// elements. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskload_epi64(long long const *__X, __m256i __M) { return (__m256i)__builtin_ia32_maskloadq256((const __v4di *)__X, (__v4di)__M); } /// Conditionally loads four 32-bit integer elements from memory \a __X, if /// the most significant bit of the corresponding element in the mask /// \a __M is set; otherwise, sets that element of the result to zero. /// Returns the 128-bit [4 x i32] result. /// /// \code{.operation} /// FOR i := 0 TO 3 /// j := i*32 /// IF __M[j+31] == 1 /// result[j+31:j] := Load32(__X+(i*4)) /// ELSE /// result[j+31:j] := 0 /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMASKMOVD instruction. /// /// \param __X /// A pointer to the memory used for loading values. /// \param __M /// A 128-bit vector of [4 x i32] containing the mask bits. /// \returns A 128-bit vector of [4 x i32] containing the loaded or zeroed /// elements. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskload_epi32(int const *__X, __m128i __M) { return (__m128i)__builtin_ia32_maskloadd((const __v4si *)__X, (__v4si)__M); } /// Conditionally loads two 64-bit integer elements from memory \a __X, if /// the most significant bit of the corresponding element in the mask /// \a __M is set; otherwise, sets that element of the result to zero. /// Returns the 128-bit [2 x i64] result. /// /// \code{.operation} /// FOR i := 0 TO 1 /// j := i*64 /// IF __M[j+63] == 1 /// result[j+63:j] := Load64(__X+(i*8)) /// ELSE /// result[j+63:j] := 0 /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMASKMOVQ instruction. /// /// \param __X /// A pointer to the memory used for loading values. /// \param __M /// A 128-bit vector of [2 x i64] containing the mask bits. /// \returns A 128-bit vector of [2 x i64] containing the loaded or zeroed /// elements. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskload_epi64(long long const *__X, __m128i __M) { return (__m128i)__builtin_ia32_maskloadq((const __v2di *)__X, (__v2di)__M); } /// Conditionally stores eight 32-bit integer elements from the 256-bit vector /// of [8 x i32] in \a __Y to memory \a __X, if the most significant bit of /// the corresponding element in the mask \a __M is set; otherwise, the /// memory element is unchanged. /// /// \code{.operation} /// FOR i := 0 TO 7 /// j := i*32 /// IF __M[j+31] == 1 /// Store32(__X+(i*4), __Y[j+31:j]) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMASKMOVD instruction. /// /// \param __X /// A pointer to the memory used for storing values. /// \param __M /// A 256-bit vector of [8 x i32] containing the mask bits. /// \param __Y /// A 256-bit vector of [8 x i32] containing the values to store. static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_maskstore_epi32(int *__X, __m256i __M, __m256i __Y) { __builtin_ia32_maskstored256((__v8si *)__X, (__v8si)__M, (__v8si)__Y); } /// Conditionally stores four 64-bit integer elements from the 256-bit vector /// of [4 x i64] in \a __Y to memory \a __X, if the most significant bit of /// the corresponding element in the mask \a __M is set; otherwise, the /// memory element is unchanged. /// /// \code{.operation} /// FOR i := 0 TO 3 /// j := i*64 /// IF __M[j+63] == 1 /// Store64(__X+(i*8), __Y[j+63:j]) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMASKMOVQ instruction. /// /// \param __X /// A pointer to the memory used for storing values. /// \param __M /// A 256-bit vector of [4 x i64] containing the mask bits. /// \param __Y /// A 256-bit vector of [4 x i64] containing the values to store. static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_maskstore_epi64(long long *__X, __m256i __M, __m256i __Y) { __builtin_ia32_maskstoreq256((__v4di *)__X, (__v4di)__M, (__v4di)__Y); } /// Conditionally stores four 32-bit integer elements from the 128-bit vector /// of [4 x i32] in \a __Y to memory \a __X, if the most significant bit of /// the corresponding element in the mask \a __M is set; otherwise, the /// memory element is unchanged. /// /// \code{.operation} /// FOR i := 0 TO 3 /// j := i*32 /// IF __M[j+31] == 1 /// Store32(__X+(i*4), __Y[j+31:j]) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMASKMOVD instruction. /// /// \param __X /// A pointer to the memory used for storing values. /// \param __M /// A 128-bit vector of [4 x i32] containing the mask bits. /// \param __Y /// A 128-bit vector of [4 x i32] containing the values to store. static __inline__ void __DEFAULT_FN_ATTRS128 _mm_maskstore_epi32(int *__X, __m128i __M, __m128i __Y) { __builtin_ia32_maskstored((__v4si *)__X, (__v4si)__M, (__v4si)__Y); } /// Conditionally stores two 64-bit integer elements from the 128-bit vector /// of [2 x i64] in \a __Y to memory \a __X, if the most significant bit of /// the corresponding element in the mask \a __M is set; otherwise, the /// memory element is unchanged. /// /// \code{.operation} /// FOR i := 0 TO 1 /// j := i*64 /// IF __M[j+63] == 1 /// Store64(__X+(i*8), __Y[j+63:j]) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMASKMOVQ instruction. /// /// \param __X /// A pointer to the memory used for storing values. /// \param __M /// A 128-bit vector of [2 x i64] containing the mask bits. /// \param __Y /// A 128-bit vector of [2 x i64] containing the values to store. static __inline__ void __DEFAULT_FN_ATTRS128 _mm_maskstore_epi64(long long *__X, __m128i __M, __m128i __Y) { __builtin_ia32_maskstoreq(( __v2di *)__X, (__v2di)__M, (__v2di)__Y); } /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __X /// left by the number of bits given in the corresponding element of the /// 256-bit vector of [8 x i32] in \a __Y, shifting in zero bits, and /// returns the result. If the shift count for any element is greater than /// 31, the result for that element is zero. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSLLVD instruction. /// /// \param __X /// A 256-bit vector of [8 x i32] to be shifted. /// \param __Y /// A 256-bit vector of [8 x i32] containing the unsigned shift counts (in /// bits). /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sllv_epi32(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psllv8si((__v8si)__X, (__v8si)__Y); } /// Shifts each 32-bit element of the 128-bit vector of [4 x i32] in \a __X /// left by the number of bits given in the corresponding element of the /// 128-bit vector of [4 x i32] in \a __Y, shifting in zero bits, and /// returns the result. If the shift count for any element is greater than /// 31, the result for that element is zero. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSLLVD instruction. /// /// \param __X /// A 128-bit vector of [4 x i32] to be shifted. /// \param __Y /// A 128-bit vector of [4 x i32] containing the unsigned shift counts (in /// bits). /// \returns A 128-bit vector of [4 x i32] containing the result. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sllv_epi32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psllv4si((__v4si)__X, (__v4si)__Y); } /// Shifts each 64-bit element of the 256-bit vector of [4 x i64] in \a __X /// left by the number of bits given in the corresponding element of the /// 128-bit vector of [4 x i64] in \a __Y, shifting in zero bits, and /// returns the result. If the shift count for any element is greater than /// 63, the result for that element is zero. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSLLVQ instruction. /// /// \param __X /// A 256-bit vector of [4 x i64] to be shifted. /// \param __Y /// A 256-bit vector of [4 x i64] containing the unsigned shift counts (in /// bits). /// \returns A 256-bit vector of [4 x i64] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sllv_epi64(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psllv4di((__v4di)__X, (__v4di)__Y); } /// Shifts each 64-bit element of the 128-bit vector of [2 x i64] in \a __X /// left by the number of bits given in the corresponding element of the /// 128-bit vector of [2 x i64] in \a __Y, shifting in zero bits, and /// returns the result. If the shift count for any element is greater than /// 63, the result for that element is zero. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSLLVQ instruction. /// /// \param __X /// A 128-bit vector of [2 x i64] to be shifted. /// \param __Y /// A 128-bit vector of [2 x i64] containing the unsigned shift counts (in /// bits). /// \returns A 128-bit vector of [2 x i64] containing the result. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sllv_epi64(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psllv2di((__v2di)__X, (__v2di)__Y); } /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __X /// right by the number of bits given in the corresponding element of the /// 256-bit vector of [8 x i32] in \a __Y, shifting in sign bits, and /// returns the result. If the shift count for any element is greater than /// 31, the result for that element is 0 or -1 according to the sign bit /// for that element. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSRAVD instruction. /// /// \param __X /// A 256-bit vector of [8 x i32] to be shifted. /// \param __Y /// A 256-bit vector of [8 x i32] containing the unsigned shift counts (in /// bits). /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srav_epi32(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psrav8si((__v8si)__X, (__v8si)__Y); } /// Shifts each 32-bit element of the 128-bit vector of [4 x i32] in \a __X /// right by the number of bits given in the corresponding element of the /// 128-bit vector of [4 x i32] in \a __Y, shifting in sign bits, and /// returns the result. If the shift count for any element is greater than /// 31, the result for that element is 0 or -1 according to the sign bit /// for that element. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSRAVD instruction. /// /// \param __X /// A 128-bit vector of [4 x i32] to be shifted. /// \param __Y /// A 128-bit vector of [4 x i32] containing the unsigned shift counts (in /// bits). /// \returns A 128-bit vector of [4 x i32] containing the result. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srav_epi32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psrav4si((__v4si)__X, (__v4si)__Y); } /// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __X /// right by the number of bits given in the corresponding element of the /// 256-bit vector of [8 x i32] in \a __Y, shifting in zero bits, and /// returns the result. If the shift count for any element is greater than /// 31, the result for that element is zero. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSRLVD instruction. /// /// \param __X /// A 256-bit vector of [8 x i32] to be shifted. /// \param __Y /// A 256-bit vector of [8 x i32] containing the unsigned shift counts (in /// bits). /// \returns A 256-bit vector of [8 x i32] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srlv_epi32(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psrlv8si((__v8si)__X, (__v8si)__Y); } /// Shifts each 32-bit element of the 128-bit vector of [4 x i32] in \a __X /// right by the number of bits given in the corresponding element of the /// 128-bit vector of [4 x i32] in \a __Y, shifting in zero bits, and /// returns the result. If the shift count for any element is greater than /// 31, the result for that element is zero. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSRLVD instruction. /// /// \param __X /// A 128-bit vector of [4 x i32] to be shifted. /// \param __Y /// A 128-bit vector of [4 x i32] containing the unsigned shift counts (in /// bits). /// \returns A 128-bit vector of [4 x i32] containing the result. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srlv_epi32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psrlv4si((__v4si)__X, (__v4si)__Y); } /// Shifts each 64-bit element of the 256-bit vector of [4 x i64] in \a __X /// right by the number of bits given in the corresponding element of the /// 128-bit vector of [4 x i64] in \a __Y, shifting in zero bits, and /// returns the result. If the shift count for any element is greater than /// 63, the result for that element is zero. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSRLVQ instruction. /// /// \param __X /// A 256-bit vector of [4 x i64] to be shifted. /// \param __Y /// A 256-bit vector of [4 x i64] containing the unsigned shift counts (in /// bits). /// \returns A 256-bit vector of [4 x i64] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srlv_epi64(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psrlv4di((__v4di)__X, (__v4di)__Y); } /// Shifts each 64-bit element of the 128-bit vector of [2 x i64] in \a __X /// right by the number of bits given in the corresponding element of the /// 128-bit vector of [2 x i64] in \a __Y, shifting in zero bits, and /// returns the result. If the shift count for any element is greater than /// 63, the result for that element is zero. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSRLVQ instruction. /// /// \param __X /// A 128-bit vector of [2 x i64] to be shifted. /// \param __Y /// A 128-bit vector of [2 x i64] containing the unsigned shift counts (in /// bits). /// \returns A 128-bit vector of [2 x i64] containing the result. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srlv_epi64(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psrlv2di((__v2di)__X, (__v2di)__Y); } /// Conditionally gathers two 64-bit floating-point values, either from the /// 128-bit vector of [2 x double] in \a a, or from memory \a m using scaled /// indexes from the 128-bit vector of [4 x i32] in \a i. The 128-bit vector /// of [2 x double] in \a mask determines the source for each element. /// /// \code{.operation} /// FOR element := 0 to 1 /// j := element*64 /// k := element*32 /// IF mask[j+63] == 0 /// result[j+63:j] := a[j+63:j] /// ELSE /// result[j+63:j] := Load64(m + SignExtend(i[k+31:k])*s) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m128d _mm_mask_i32gather_pd(__m128d a, const double *m, __m128i i, /// __m128d mask, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VGATHERDPD instruction. /// /// \param a /// A 128-bit vector of [2 x double] used as the source when a mask bit is /// zero. /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [4 x i32] containing signed indexes into \a m. Only /// the first two elements are used. /// \param mask /// A 128-bit vector of [2 x double] containing the mask. The most /// significant bit of each element in the mask vector represents the mask /// bits. If a mask bit is zero, the corresponding value from vector \a a /// is gathered; otherwise the value is loaded from memory. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [2 x double] containing the gathered values. #define _mm_mask_i32gather_pd(a, m, i, mask, s) \ ((__m128d)__builtin_ia32_gatherd_pd((__v2df)(__m128i)(a), \ (double const *)(m), \ (__v4si)(__m128i)(i), \ (__v2df)(__m128d)(mask), (s))) /// Conditionally gathers four 64-bit floating-point values, either from the /// 256-bit vector of [4 x double] in \a a, or from memory \a m using scaled /// indexes from the 128-bit vector of [4 x i32] in \a i. The 256-bit vector /// of [4 x double] in \a mask determines the source for each element. /// /// \code{.operation} /// FOR element := 0 to 3 /// j := element*64 /// k := element*32 /// IF mask[j+63] == 0 /// result[j+63:j] := a[j+63:j] /// ELSE /// result[j+63:j] := Load64(m + SignExtend(i[k+31:k])*s) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256d _mm256_mask_i32gather_pd(__m256d a, const double *m, __m128i i, /// __m256d mask, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VGATHERDPD instruction. /// /// \param a /// A 256-bit vector of [4 x double] used as the source when a mask bit is /// zero. /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [4 x i32] containing signed indexes into \a m. /// \param mask /// A 256-bit vector of [4 x double] containing the mask. The most /// significant bit of each element in the mask vector represents the mask /// bits. If a mask bit is zero, the corresponding value from vector \a a /// is gathered; otherwise the value is loaded from memory. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 256-bit vector of [4 x double] containing the gathered values. #define _mm256_mask_i32gather_pd(a, m, i, mask, s) \ ((__m256d)__builtin_ia32_gatherd_pd256((__v4df)(__m256d)(a), \ (double const *)(m), \ (__v4si)(__m128i)(i), \ (__v4df)(__m256d)(mask), (s))) /// Conditionally gathers two 64-bit floating-point values, either from the /// 128-bit vector of [2 x double] in \a a, or from memory \a m using scaled /// indexes from the 128-bit vector of [2 x i64] in \a i. The 128-bit vector /// of [2 x double] in \a mask determines the source for each element. /// /// \code{.operation} /// FOR element := 0 to 1 /// j := element*64 /// k := element*64 /// IF mask[j+63] == 0 /// result[j+63:j] := a[j+63:j] /// ELSE /// result[j+63:j] := Load64(m + SignExtend(i[k+63:k])*s) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m128d _mm_mask_i64gather_pd(__m128d a, const double *m, __m128i i, /// __m128d mask, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VGATHERQPD instruction. /// /// \param a /// A 128-bit vector of [2 x double] used as the source when a mask bit is /// zero. /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [2 x i64] containing signed indexes into \a m. /// \param mask /// A 128-bit vector of [2 x double] containing the mask. The most /// significant bit of each element in the mask vector represents the mask /// bits. If a mask bit is zero, the corresponding value from vector \a a /// is gathered; otherwise the value is loaded from memory. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [2 x double] containing the gathered values. #define _mm_mask_i64gather_pd(a, m, i, mask, s) \ ((__m128d)__builtin_ia32_gatherq_pd((__v2df)(__m128d)(a), \ (double const *)(m), \ (__v2di)(__m128i)(i), \ (__v2df)(__m128d)(mask), (s))) /// Conditionally gathers four 64-bit floating-point values, either from the /// 256-bit vector of [4 x double] in \a a, or from memory \a m using scaled /// indexes from the 256-bit vector of [4 x i64] in \a i. The 256-bit vector /// of [4 x double] in \a mask determines the source for each element. /// /// \code{.operation} /// FOR element := 0 to 3 /// j := element*64 /// k := element*64 /// IF mask[j+63] == 0 /// result[j+63:j] := a[j+63:j] /// ELSE /// result[j+63:j] := Load64(m + SignExtend(i[k+63:k])*s) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256d _mm256_mask_i64gather_pd(__m256d a, const double *m, __m256i i, /// __m256d mask, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VGATHERQPD instruction. /// /// \param a /// A 256-bit vector of [4 x double] used as the source when a mask bit is /// zero. /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 256-bit vector of [4 x i64] containing signed indexes into \a m. /// \param mask /// A 256-bit vector of [4 x double] containing the mask. The most /// significant bit of each element in the mask vector represents the mask /// bits. If a mask bit is zero, the corresponding value from vector \a a /// is gathered; otherwise the value is loaded from memory. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 256-bit vector of [4 x double] containing the gathered values. #define _mm256_mask_i64gather_pd(a, m, i, mask, s) \ ((__m256d)__builtin_ia32_gatherq_pd256((__v4df)(__m256d)(a), \ (double const *)(m), \ (__v4di)(__m256i)(i), \ (__v4df)(__m256d)(mask), (s))) /// Conditionally gathers four 32-bit floating-point values, either from the /// 128-bit vector of [4 x float] in \a a, or from memory \a m using scaled /// indexes from the 128-bit vector of [4 x i32] in \a i. The 128-bit vector /// of [4 x float] in \a mask determines the source for each element. /// /// \code{.operation} /// FOR element := 0 to 3 /// j := element*32 /// k := element*32 /// IF mask[j+31] == 0 /// result[j+31:j] := a[j+31:j] /// ELSE /// result[j+31:j] := Load32(m + SignExtend(i[k+31:k])*s) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m128 _mm_mask_i32gather_ps(__m128 a, const float *m, __m128i i, /// __m128 mask, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VGATHERDPS instruction. /// /// \param a /// A 128-bit vector of [4 x float] used as the source when a mask bit is /// zero. /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [4 x i32] containing signed indexes into \a m. /// \param mask /// A 128-bit vector of [4 x float] containing the mask. The most /// significant bit of each element in the mask vector represents the mask /// bits. If a mask bit is zero, the corresponding value from vector \a a /// is gathered; otherwise the value is loaded from memory. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [4 x float] containing the gathered values. #define _mm_mask_i32gather_ps(a, m, i, mask, s) \ ((__m128)__builtin_ia32_gatherd_ps((__v4sf)(__m128)(a), \ (float const *)(m), \ (__v4si)(__m128i)(i), \ (__v4sf)(__m128)(mask), (s))) /// Conditionally gathers eight 32-bit floating-point values, either from the /// 256-bit vector of [8 x float] in \a a, or from memory \a m using scaled /// indexes from the 256-bit vector of [8 x i32] in \a i. The 256-bit vector /// of [8 x float] in \a mask determines the source for each element. /// /// \code{.operation} /// FOR element := 0 to 7 /// j := element*32 /// k := element*32 /// IF mask[j+31] == 0 /// result[j+31:j] := a[j+31:j] /// ELSE /// result[j+31:j] := Load32(m + SignExtend(i[k+31:k])*s) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256 _mm256_mask_i32gather_ps(__m256 a, const float *m, __m256i i, /// __m256 mask, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VGATHERDPS instruction. /// /// \param a /// A 256-bit vector of [8 x float] used as the source when a mask bit is /// zero. /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 256-bit vector of [8 x i32] containing signed indexes into \a m. /// \param mask /// A 256-bit vector of [8 x float] containing the mask. The most /// significant bit of each element in the mask vector represents the mask /// bits. If a mask bit is zero, the corresponding value from vector \a a /// is gathered; otherwise the value is loaded from memory. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 256-bit vector of [8 x float] containing the gathered values. #define _mm256_mask_i32gather_ps(a, m, i, mask, s) \ ((__m256)__builtin_ia32_gatherd_ps256((__v8sf)(__m256)(a), \ (float const *)(m), \ (__v8si)(__m256i)(i), \ (__v8sf)(__m256)(mask), (s))) /// Conditionally gathers two 32-bit floating-point values, either from the /// 128-bit vector of [4 x float] in \a a, or from memory \a m using scaled /// indexes from the 128-bit vector of [2 x i64] in \a i. The 128-bit vector /// of [4 x float] in \a mask determines the source for the lower two /// elements. The upper two elements of the result are zeroed. /// /// \code{.operation} /// FOR element := 0 to 1 /// j := element*32 /// k := element*64 /// IF mask[j+31] == 0 /// result[j+31:j] := a[j+31:j] /// ELSE /// result[j+31:j] := Load32(m + SignExtend(i[k+63:k])*s) /// FI /// ENDFOR /// result[127:64] := 0 /// \endcode /// /// \headerfile /// /// \code /// __m128 _mm_mask_i64gather_ps(__m128 a, const float *m, __m128i i, /// __m128 mask, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VGATHERQPS instruction. /// /// \param a /// A 128-bit vector of [4 x float] used as the source when a mask bit is /// zero. Only the first two elements are used. /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [2 x i64] containing signed indexes into \a m. /// \param mask /// A 128-bit vector of [4 x float] containing the mask. The most /// significant bit of each element in the mask vector represents the mask /// bits. If a mask bit is zero, the corresponding value from vector \a a /// is gathered; otherwise the value is loaded from memory. Only the first /// two elements are used. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [4 x float] containing the gathered values. #define _mm_mask_i64gather_ps(a, m, i, mask, s) \ ((__m128)__builtin_ia32_gatherq_ps((__v4sf)(__m128)(a), \ (float const *)(m), \ (__v2di)(__m128i)(i), \ (__v4sf)(__m128)(mask), (s))) /// Conditionally gathers four 32-bit floating-point values, either from the /// 128-bit vector of [4 x float] in \a a, or from memory \a m using scaled /// indexes from the 256-bit vector of [4 x i64] in \a i. The 128-bit vector /// of [4 x float] in \a mask determines the source for each element. /// /// \code{.operation} /// FOR element := 0 to 3 /// j := element*32 /// k := element*64 /// IF mask[j+31] == 0 /// result[j+31:j] := a[j+31:j] /// ELSE /// result[j+31:j] := Load32(m + SignExtend(i[k+63:k])*s) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m128 _mm256_mask_i64gather_ps(__m128 a, const float *m, __m256i i, /// __m128 mask, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VGATHERQPS instruction. /// /// \param a /// A 128-bit vector of [4 x float] used as the source when a mask bit is /// zero. /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 256-bit vector of [4 x i64] containing signed indexes into \a m. /// \param mask /// A 128-bit vector of [4 x float] containing the mask. The most /// significant bit of each element in the mask vector represents the mask /// bits. If a mask bit is zero, the corresponding value from vector \a a /// is gathered; otherwise the value is loaded from memory. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [4 x float] containing the gathered values. #define _mm256_mask_i64gather_ps(a, m, i, mask, s) \ ((__m128)__builtin_ia32_gatherq_ps256((__v4sf)(__m128)(a), \ (float const *)(m), \ (__v4di)(__m256i)(i), \ (__v4sf)(__m128)(mask), (s))) /// Conditionally gathers four 32-bit integer values, either from the /// 128-bit vector of [4 x i32] in \a a, or from memory \a m using scaled /// indexes from the 128-bit vector of [4 x i32] in \a i. The 128-bit vector /// of [4 x i32] in \a mask determines the source for each element. /// /// \code{.operation} /// FOR element := 0 to 3 /// j := element*32 /// k := element*32 /// IF mask[j+31] == 0 /// result[j+31:j] := a[j+31:j] /// ELSE /// result[j+31:j] := Load32(m + SignExtend(i[k+31:k])*s) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m128i _mm_mask_i32gather_epi32(__m128i a, const int *m, __m128i i, /// __m128i mask, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VPGATHERDD instruction. /// /// \param a /// A 128-bit vector of [4 x i32] used as the source when a mask bit is /// zero. /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [4 x i32] containing signed indexes into \a m. /// \param mask /// A 128-bit vector of [4 x i32] containing the mask. The most significant /// bit of each element in the mask vector represents the mask bits. If a /// mask bit is zero, the corresponding value from vector \a a is gathered; /// otherwise the value is loaded from memory. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [4 x i32] containing the gathered values. #define _mm_mask_i32gather_epi32(a, m, i, mask, s) \ ((__m128i)__builtin_ia32_gatherd_d((__v4si)(__m128i)(a), \ (int const *)(m), \ (__v4si)(__m128i)(i), \ (__v4si)(__m128i)(mask), (s))) /// Conditionally gathers eight 32-bit integer values, either from the /// 256-bit vector of [8 x i32] in \a a, or from memory \a m using scaled /// indexes from the 256-bit vector of [8 x i32] in \a i. The 256-bit vector /// of [8 x i32] in \a mask determines the source for each element. /// /// \code{.operation} /// FOR element := 0 to 7 /// j := element*32 /// k := element*32 /// IF mask[j+31] == 0 /// result[j+31:j] := a[j+31:j] /// ELSE /// result[j+31:j] := Load32(m + SignExtend(i[k+31:k])*s) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256i _mm256_mask_i32gather_epi32(__m256i a, const int *m, __m256i i, /// __m256i mask, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VPGATHERDD instruction. /// /// \param a /// A 256-bit vector of [8 x i32] used as the source when a mask bit is /// zero. /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 256-bit vector of [8 x i32] containing signed indexes into \a m. /// \param mask /// A 256-bit vector of [8 x i32] containing the mask. The most significant /// bit of each element in the mask vector represents the mask bits. If a /// mask bit is zero, the corresponding value from vector \a a is gathered; /// otherwise the value is loaded from memory. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 256-bit vector of [8 x i32] containing the gathered values. #define _mm256_mask_i32gather_epi32(a, m, i, mask, s) \ ((__m256i)__builtin_ia32_gatherd_d256((__v8si)(__m256i)(a), \ (int const *)(m), \ (__v8si)(__m256i)(i), \ (__v8si)(__m256i)(mask), (s))) /// Conditionally gathers two 32-bit integer values, either from the /// 128-bit vector of [4 x i32] in \a a, or from memory \a m using scaled /// indexes from the 128-bit vector of [2 x i64] in \a i. The 128-bit vector /// of [4 x i32] in \a mask determines the source for the lower two /// elements. The upper two elements of the result are zeroed. /// /// \code{.operation} /// FOR element := 0 to 1 /// j := element*32 /// k := element*64 /// IF mask[j+31] == 0 /// result[j+31:j] := a[j+31:j] /// ELSE /// result[j+31:j] := Load32(m + SignExtend(i[k+63:k])*s) /// FI /// ENDFOR /// result[127:64] := 0 /// \endcode /// /// \headerfile /// /// \code /// __m128i _mm_mask_i64gather_epi32(__m128i a, const int *m, __m128i i, /// __m128i mask, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VPGATHERQD instruction. /// /// \param a /// A 128-bit vector of [4 x i32] used as the source when a mask bit is /// zero. Only the first two elements are used. /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [2 x i64] containing indexes into \a m. /// \param mask /// A 128-bit vector of [4 x i32] containing the mask. The most significant /// bit of each element in the mask vector represents the mask bits. If a /// mask bit is zero, the corresponding value from vector \a a is gathered; /// otherwise the value is loaded from memory. Only the first two elements /// are used. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [4 x i32] containing the gathered values. #define _mm_mask_i64gather_epi32(a, m, i, mask, s) \ ((__m128i)__builtin_ia32_gatherq_d((__v4si)(__m128i)(a), \ (int const *)(m), \ (__v2di)(__m128i)(i), \ (__v4si)(__m128i)(mask), (s))) /// Conditionally gathers four 32-bit integer values, either from the /// 128-bit vector of [4 x i32] in \a a, or from memory \a m using scaled /// indexes from the 256-bit vector of [4 x i64] in \a i. The 128-bit vector /// of [4 x i32] in \a mask determines the source for each element. /// /// \code{.operation} /// FOR element := 0 to 3 /// j := element*32 /// k := element*64 /// IF mask[j+31] == 0 /// result[j+31:j] := a[j+31:j] /// ELSE /// result[j+31:j] := Load32(m + SignExtend(i[k+63:k])*s) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m128i _mm256_mask_i64gather_epi32(__m128i a, const int *m, __m256i i, /// __m128i mask, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VPGATHERQD instruction. /// /// \param a /// A 128-bit vector of [4 x i32] used as the source when a mask bit is /// zero. /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 256-bit vector of [4 x i64] containing signed indexes into \a m. /// \param mask /// A 128-bit vector of [4 x i32] containing the mask. The most significant /// bit of each element in the mask vector represents the mask bits. If a /// mask bit is zero, the corresponding value from vector \a a is gathered; /// otherwise the value is loaded from memory. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [4 x i32] containing the gathered values. #define _mm256_mask_i64gather_epi32(a, m, i, mask, s) \ ((__m128i)__builtin_ia32_gatherq_d256((__v4si)(__m128i)(a), \ (int const *)(m), \ (__v4di)(__m256i)(i), \ (__v4si)(__m128i)(mask), (s))) /// Conditionally gathers two 64-bit integer values, either from the /// 128-bit vector of [2 x i64] in \a a, or from memory \a m using scaled /// indexes from the 128-bit vector of [4 x i32] in \a i. The 128-bit vector /// of [2 x i64] in \a mask determines the source for each element. /// /// \code{.operation} /// FOR element := 0 to 1 /// j := element*64 /// k := element*32 /// IF mask[j+63] == 0 /// result[j+63:j] := a[j+63:j] /// ELSE /// result[j+63:j] := Load64(m + SignExtend(i[k+31:k])*s) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m128i _mm_mask_i32gather_epi64(__m128i a, const long long *m, __m128i i, /// __m128i mask, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VPGATHERDQ instruction. /// /// \param a /// A 128-bit vector of [2 x i64] used as the source when a mask bit is /// zero. /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [4 x i32] containing signed indexes into \a m. Only /// the first two elements are used. /// \param mask /// A 128-bit vector of [2 x i64] containing the mask. The most significant /// bit of each element in the mask vector represents the mask bits. If a /// mask bit is zero, the corresponding value from vector \a a is gathered; /// otherwise the value is loaded from memory. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [2 x i64] containing the gathered values. #define _mm_mask_i32gather_epi64(a, m, i, mask, s) \ ((__m128i)__builtin_ia32_gatherd_q((__v2di)(__m128i)(a), \ (long long const *)(m), \ (__v4si)(__m128i)(i), \ (__v2di)(__m128i)(mask), (s))) /// Conditionally gathers four 64-bit integer values, either from the /// 256-bit vector of [4 x i64] in \a a, or from memory \a m using scaled /// indexes from the 128-bit vector of [4 x i32] in \a i. The 256-bit vector /// of [4 x i64] in \a mask determines the source for each element. /// /// \code{.operation} /// FOR element := 0 to 3 /// j := element*64 /// k := element*32 /// IF mask[j+63] == 0 /// result[j+63:j] := a[j+63:j] /// ELSE /// result[j+63:j] := Load64(m + SignExtend(i[k+31:k])*s) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256i _mm256_mask_i32gather_epi64(__m256i a, const long long *m, /// __m128i i, __m256i mask, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VPGATHERDQ instruction. /// /// \param a /// A 256-bit vector of [4 x i64] used as the source when a mask bit is /// zero. /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [4 x i32] containing signed indexes into \a m. /// \param mask /// A 256-bit vector of [4 x i64] containing the mask. The most significant /// bit of each element in the mask vector represents the mask bits. If a /// mask bit is zero, the corresponding value from vector \a a is gathered; /// otherwise the value is loaded from memory. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 256-bit vector of [4 x i64] containing the gathered values. #define _mm256_mask_i32gather_epi64(a, m, i, mask, s) \ ((__m256i)__builtin_ia32_gatherd_q256((__v4di)(__m256i)(a), \ (long long const *)(m), \ (__v4si)(__m128i)(i), \ (__v4di)(__m256i)(mask), (s))) /// Conditionally gathers two 64-bit integer values, either from the /// 128-bit vector of [2 x i64] in \a a, or from memory \a m using scaled /// indexes from the 128-bit vector of [2 x i64] in \a i. The 128-bit vector /// of [2 x i64] in \a mask determines the source for each element. /// /// \code{.operation} /// FOR element := 0 to 1 /// j := element*64 /// k := element*64 /// IF mask[j+63] == 0 /// result[j+63:j] := a[j+63:j] /// ELSE /// result[j+63:j] := Load64(m + SignExtend(i[k+63:k])*s) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m128i _mm_mask_i64gather_epi64(__m128i a, const long long *m, __m128i i, /// __m128i mask, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VPGATHERQQ instruction. /// /// \param a /// A 128-bit vector of [2 x i64] used as the source when a mask bit is /// zero. /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [2 x i64] containing signed indexes into \a m. /// \param mask /// A 128-bit vector of [2 x i64] containing the mask. The most significant /// bit of each element in the mask vector represents the mask bits. If a /// mask bit is zero, the corresponding value from vector \a a is gathered; /// otherwise the value is loaded from memory. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [2 x i64] containing the gathered values. #define _mm_mask_i64gather_epi64(a, m, i, mask, s) \ ((__m128i)__builtin_ia32_gatherq_q((__v2di)(__m128i)(a), \ (long long const *)(m), \ (__v2di)(__m128i)(i), \ (__v2di)(__m128i)(mask), (s))) /// Conditionally gathers four 64-bit integer values, either from the /// 256-bit vector of [4 x i64] in \a a, or from memory \a m using scaled /// indexes from the 256-bit vector of [4 x i64] in \a i. The 256-bit vector /// of [4 x i64] in \a mask determines the source for each element. /// /// \code{.operation} /// FOR element := 0 to 3 /// j := element*64 /// k := element*64 /// IF mask[j+63] == 0 /// result[j+63:j] := a[j+63:j] /// ELSE /// result[j+63:j] := Load64(m + SignExtend(i[k+63:k])*s) /// FI /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256i _mm256_mask_i64gather_epi64(__m256i a, const long long *m, /// __m256i i, __m256i mask, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VPGATHERQQ instruction. /// /// \param a /// A 256-bit vector of [4 x i64] used as the source when a mask bit is /// zero. /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 256-bit vector of [4 x i64] containing signed indexes into \a m. /// \param mask /// A 256-bit vector of [4 x i64] containing the mask. The most significant /// bit of each element in the mask vector represents the mask bits. If a /// mask bit is zero, the corresponding value from vector \a a is gathered; /// otherwise the value is loaded from memory. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 256-bit vector of [4 x i64] containing the gathered values. #define _mm256_mask_i64gather_epi64(a, m, i, mask, s) \ ((__m256i)__builtin_ia32_gatherq_q256((__v4di)(__m256i)(a), \ (long long const *)(m), \ (__v4di)(__m256i)(i), \ (__v4di)(__m256i)(mask), (s))) /// Gathers two 64-bit floating-point values from memory \a m using scaled /// indexes from the 128-bit vector of [4 x i32] in \a i. /// /// \code{.operation} /// FOR element := 0 to 1 /// j := element*64 /// k := element*32 /// result[j+63:j] := Load64(m + SignExtend(i[k+31:k])*s) /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m128d _mm_i32gather_pd(const double *m, __m128i i, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VGATHERDPD instruction. /// /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [4 x i32] containing signed indexes into \a m. Only /// the first two elements are used. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [2 x double] containing the gathered values. #define _mm_i32gather_pd(m, i, s) \ ((__m128d)__builtin_ia32_gatherd_pd((__v2df)_mm_undefined_pd(), \ (double const *)(m), \ (__v4si)(__m128i)(i), \ (__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), \ _mm_setzero_pd()), \ (s))) /// Gathers four 64-bit floating-point values from memory \a m using scaled /// indexes from the 128-bit vector of [4 x i32] in \a i. /// /// \code{.operation} /// FOR element := 0 to 3 /// j := element*64 /// k := element*32 /// result[j+63:j] := Load64(m + SignExtend(i[k+31:k])*s) /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256d _mm256_i32gather_pd(const double *m, __m128i i, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VGATHERDPD instruction. /// /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [4 x i32] containing signed indexes into \a m. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 256-bit vector of [4 x double] containing the gathered values. #define _mm256_i32gather_pd(m, i, s) \ ((__m256d)__builtin_ia32_gatherd_pd256((__v4df)_mm256_undefined_pd(), \ (double const *)(m), \ (__v4si)(__m128i)(i), \ (__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), \ _mm256_setzero_pd(), \ _CMP_EQ_OQ), \ (s))) /// Gathers two 64-bit floating-point values from memory \a m using scaled /// indexes from the 128-bit vector of [2 x i64] in \a i. /// /// \code{.operation} /// FOR element := 0 to 1 /// j := element*64 /// k := element*64 /// result[j+63:j] := Load64(m + SignExtend(i[k+63:k])*s) /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m128d _mm_i64gather_pd(const double *m, __m128i i, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VGATHERQPD instruction. /// /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [2 x i64] containing signed indexes into \a m. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [2 x double] containing the gathered values. #define _mm_i64gather_pd(m, i, s) \ ((__m128d)__builtin_ia32_gatherq_pd((__v2df)_mm_undefined_pd(), \ (double const *)(m), \ (__v2di)(__m128i)(i), \ (__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), \ _mm_setzero_pd()), \ (s))) /// Gathers four 64-bit floating-point values from memory \a m using scaled /// indexes from the 256-bit vector of [4 x i64] in \a i. /// /// \code{.operation} /// FOR element := 0 to 3 /// j := element*64 /// k := element*64 /// result[j+63:j] := Load64(m + SignExtend(i[k+63:k])*s) /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256d _mm256_i64gather_pd(const double *m, __m256i i, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VGATHERQPD instruction. /// /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 256-bit vector of [4 x i64] containing signed indexes into \a m. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 256-bit vector of [4 x double] containing the gathered values. #define _mm256_i64gather_pd(m, i, s) \ ((__m256d)__builtin_ia32_gatherq_pd256((__v4df)_mm256_undefined_pd(), \ (double const *)(m), \ (__v4di)(__m256i)(i), \ (__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), \ _mm256_setzero_pd(), \ _CMP_EQ_OQ), \ (s))) /// Gathers four 32-bit floating-point values from memory \a m using scaled /// indexes from the 128-bit vector of [4 x i32] in \a i. /// /// \code{.operation} /// FOR element := 0 to 3 /// j := element*32 /// k := element*32 /// result[j+31:j] := Load32(m + SignExtend(i[k+31:k])*s) /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m128 _mm_i32gather_ps(const float *m, __m128i i, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VGATHERDPS instruction. /// /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [4 x i32] containing signed indexes into \a m. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [4 x float] containing the gathered values. #define _mm_i32gather_ps(m, i, s) \ ((__m128)__builtin_ia32_gatherd_ps((__v4sf)_mm_undefined_ps(), \ (float const *)(m), \ (__v4si)(__m128i)(i), \ (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \ _mm_setzero_ps()), \ (s))) /// Gathers eight 32-bit floating-point values from memory \a m using scaled /// indexes from the 256-bit vector of [8 x i32] in \a i. /// /// \code{.operation} /// FOR element := 0 to 7 /// j := element*32 /// k := element*32 /// result[j+31:j] := Load32(m + SignExtend(i[k+31:k])*s) /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256 _mm256_i32gather_ps(const float *m, __m256i i, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VGATHERDPS instruction. /// /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 256-bit vector of [8 x i32] containing signed indexes into \a m. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 256-bit vector of [8 x float] containing the gathered values. #define _mm256_i32gather_ps(m, i, s) \ ((__m256)__builtin_ia32_gatherd_ps256((__v8sf)_mm256_undefined_ps(), \ (float const *)(m), \ (__v8si)(__m256i)(i), \ (__v8sf)_mm256_cmp_ps(_mm256_setzero_ps(), \ _mm256_setzero_ps(), \ _CMP_EQ_OQ), \ (s))) /// Gathers two 32-bit floating-point values from memory \a m using scaled /// indexes from the 128-bit vector of [2 x i64] in \a i. The upper two /// elements of the result are zeroed. /// /// \code{.operation} /// FOR element := 0 to 1 /// j := element*32 /// k := element*64 /// result[j+31:j] := Load32(m + SignExtend(i[k+63:k])*s) /// ENDFOR /// result[127:64] := 0 /// \endcode /// /// \headerfile /// /// \code /// __m128 _mm_i64gather_ps(const float *m, __m128i i, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VGATHERQPS instruction. /// /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [2 x i64] containing signed indexes into \a m. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [4 x float] containing the gathered values. #define _mm_i64gather_ps(m, i, s) \ ((__m128)__builtin_ia32_gatherq_ps((__v4sf)_mm_undefined_ps(), \ (float const *)(m), \ (__v2di)(__m128i)(i), \ (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \ _mm_setzero_ps()), \ (s))) /// Gathers four 32-bit floating-point values from memory \a m using scaled /// indexes from the 256-bit vector of [4 x i64] in \a i. /// /// \code{.operation} /// FOR element := 0 to 3 /// j := element*32 /// k := element*64 /// result[j+31:j] := Load32(m + SignExtend(i[k+64:k])*s) /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m128 _mm256_i64gather_ps(const float *m, __m256i i, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VGATHERQPS instruction. /// /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 256-bit vector of [4 x i64] containing signed indexes into \a m. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [4 x float] containing the gathered values. #define _mm256_i64gather_ps(m, i, s) \ ((__m128)__builtin_ia32_gatherq_ps256((__v4sf)_mm_undefined_ps(), \ (float const *)(m), \ (__v4di)(__m256i)(i), \ (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \ _mm_setzero_ps()), \ (s))) /// Gathers four 32-bit floating-point values from memory \a m using scaled /// indexes from the 128-bit vector of [4 x i32] in \a i. /// /// \code{.operation} /// FOR element := 0 to 3 /// j := element*32 /// k := element*32 /// result[j+31:j] := Load32(m + SignExtend(i[k+31:k])*s) /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m128i _mm_i32gather_epi32(const int *m, __m128i i, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VPGATHERDD instruction. /// /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [4 x i32] containing signed indexes into \a m. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [4 x i32] containing the gathered values. #define _mm_i32gather_epi32(m, i, s) \ ((__m128i)__builtin_ia32_gatherd_d((__v4si)_mm_undefined_si128(), \ (int const *)(m), (__v4si)(__m128i)(i), \ (__v4si)_mm_set1_epi32(-1), (s))) /// Gathers eight 32-bit floating-point values from memory \a m using scaled /// indexes from the 256-bit vector of [8 x i32] in \a i. /// /// \code{.operation} /// FOR element := 0 to 7 /// j := element*32 /// k := element*32 /// result[j+31:j] := Load32(m + SignExtend(i[k+31:k])*s) /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256i _mm256_i32gather_epi32(const int *m, __m256i i, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VPGATHERDD instruction. /// /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 256-bit vector of [8 x i32] containing signed indexes into \a m. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 256-bit vector of [8 x i32] containing the gathered values. #define _mm256_i32gather_epi32(m, i, s) \ ((__m256i)__builtin_ia32_gatherd_d256((__v8si)_mm256_undefined_si256(), \ (int const *)(m), (__v8si)(__m256i)(i), \ (__v8si)_mm256_set1_epi32(-1), (s))) /// Gathers two 32-bit integer values from memory \a m using scaled indexes /// from the 128-bit vector of [2 x i64] in \a i. The upper two elements /// of the result are zeroed. /// /// \code{.operation} /// FOR element := 0 to 1 /// j := element*32 /// k := element*64 /// result[j+31:j] := Load32(m + SignExtend(i[k+63:k])*s) /// ENDFOR /// result[127:64] := 0 /// \endcode /// /// \headerfile /// /// \code /// __m128i _mm_i64gather_epi32(const int *m, __m128i i, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VPGATHERQD instruction. /// /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [2 x i64] containing signed indexes into \a m. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [4 x i32] containing the gathered values. #define _mm_i64gather_epi32(m, i, s) \ ((__m128i)__builtin_ia32_gatherq_d((__v4si)_mm_undefined_si128(), \ (int const *)(m), (__v2di)(__m128i)(i), \ (__v4si)_mm_set1_epi32(-1), (s))) /// Gathers four 32-bit integer values from memory \a m using scaled indexes /// from the 256-bit vector of [4 x i64] in \a i. /// /// \code{.operation} /// FOR element := 0 to 3 /// j := element*32 /// k := element*64 /// result[j+31:j] := Load32(m + SignExtend(i[k+63:k])*s) /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m128i _mm256_i64gather_epi32(const int *m, __m256i i, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VPGATHERQD instruction. /// /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 256-bit vector of [4 x i64] containing signed indexes into \a m. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [4 x i32] containing the gathered values. #define _mm256_i64gather_epi32(m, i, s) \ ((__m128i)__builtin_ia32_gatherq_d256((__v4si)_mm_undefined_si128(), \ (int const *)(m), (__v4di)(__m256i)(i), \ (__v4si)_mm_set1_epi32(-1), (s))) /// Gathers two 64-bit integer values from memory \a m using scaled indexes /// from the 128-bit vector of [4 x i32] in \a i. /// /// \code{.operation} /// FOR element := 0 to 1 /// j := element*64 /// k := element*32 /// result[j+63:j] := Load64(m + SignExtend(i[k+31:k])*s) /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m128i _mm_i32gather_epi64(const long long *m, __m128i i, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VPGATHERDQ instruction. /// /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [4 x i32] containing signed indexes into \a m. Only /// the first two elements are used. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [2 x i64] containing the gathered values. #define _mm_i32gather_epi64(m, i, s) \ ((__m128i)__builtin_ia32_gatherd_q((__v2di)_mm_undefined_si128(), \ (long long const *)(m), \ (__v4si)(__m128i)(i), \ (__v2di)_mm_set1_epi64x(-1), (s))) /// Gathers four 64-bit integer values from memory \a m using scaled indexes /// from the 128-bit vector of [4 x i32] in \a i. /// /// \code{.operation} /// FOR element := 0 to 3 /// j := element*64 /// k := element*32 /// result[j+63:j] := Load64(m + SignExtend(i[k+31:k])*s) /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256i _mm256_i32gather_epi64(const long long *m, __m128i i, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VPGATHERDQ instruction. /// /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [4 x i32] containing signed indexes into \a m. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 256-bit vector of [4 x i64] containing the gathered values. #define _mm256_i32gather_epi64(m, i, s) \ ((__m256i)__builtin_ia32_gatherd_q256((__v4di)_mm256_undefined_si256(), \ (long long const *)(m), \ (__v4si)(__m128i)(i), \ (__v4di)_mm256_set1_epi64x(-1), (s))) /// Gathers two 64-bit integer values from memory \a m using scaled indexes /// from the 128-bit vector of [2 x i64] in \a i. /// /// \code{.operation} /// FOR element := 0 to 1 /// j := element*64 /// k := element*64 /// result[j+63:j] := Load64(m + SignExtend(i[k+63:k])*s) /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m128i _mm_i64gather_epi64(const long long *m, __m128i i, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VPGATHERQQ instruction. /// /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 128-bit vector of [2 x i64] containing signed indexes into \a m. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 128-bit vector of [2 x i64] containing the gathered values. #define _mm_i64gather_epi64(m, i, s) \ ((__m128i)__builtin_ia32_gatherq_q((__v2di)_mm_undefined_si128(), \ (long long const *)(m), \ (__v2di)(__m128i)(i), \ (__v2di)_mm_set1_epi64x(-1), (s))) /// Gathers four 64-bit integer values from memory \a m using scaled indexes /// from the 256-bit vector of [4 x i64] in \a i. /// /// \code{.operation} /// FOR element := 0 to 3 /// j := element*64 /// k := element*64 /// result[j+63:j] := Load64(m + SignExtend(i[k+63:k])*s) /// ENDFOR /// \endcode /// /// \headerfile /// /// \code /// __m256i _mm256_i64gather_epi64(const long long *m, __m256i i, const int s); /// \endcode /// /// This intrinsic corresponds to the \c VPGATHERQQ instruction. /// /// \param m /// A pointer to the memory used for loading values. /// \param i /// A 256-bit vector of [4 x i64] containing signed indexes into \a m. /// \param s /// A literal constant scale factor for the indexes in \a i. Must be /// 1, 2, 4, or 8. /// \returns A 256-bit vector of [4 x i64] containing the gathered values. #define _mm256_i64gather_epi64(m, i, s) \ ((__m256i)__builtin_ia32_gatherq_q256((__v4di)_mm256_undefined_si256(), \ (long long const *)(m), \ (__v4di)(__m256i)(i), \ (__v4di)_mm256_set1_epi64x(-1), (s))) #undef __DEFAULT_FN_ATTRS256 #undef __DEFAULT_FN_ATTRS128 #endif /* __AVX2INTRIN_H */ /*===----------------- avxifmaintrin.h - IFMA intrinsics -------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __AVXIFMAINTRIN_H #define __AVXIFMAINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, __target__("avxifma"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, __target__("avxifma"), \ __min_vector_width__(256))) // must vex-encoding /// Multiply packed unsigned 52-bit integers in each 64-bit element of \a __Y /// and \a __Z to form a 104-bit intermediate result. Add the high 52-bit /// unsigned integer from the intermediate result with the corresponding /// unsigned 64-bit integer in \a __X, and store the results in \a dst. /// /// \headerfile /// /// \code /// __m128i /// _mm_madd52hi_avx_epu64 (__m128i __X, __m128i __Y, __m128i __Z) /// \endcode /// /// This intrinsic corresponds to the \c VPMADD52HUQ instruction. /// /// \return /// return __m128i dst. /// \param __X /// A 128-bit vector of [2 x i64] /// \param __Y /// A 128-bit vector of [2 x i64] /// \param __Z /// A 128-bit vector of [2 x i64] /// /// \code{.operation} /// FOR j := 0 to 1 /// i := j*64 /// tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) /// dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[103:52]) /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_madd52hi_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) { return (__m128i)__builtin_ia32_vpmadd52huq128((__v2di)__X, (__v2di)__Y, (__v2di)__Z); } /// Multiply packed unsigned 52-bit integers in each 64-bit element of \a __Y /// and \a __Z to form a 104-bit intermediate result. Add the high 52-bit /// unsigned integer from the intermediate result with the corresponding /// unsigned 64-bit integer in \a __X, and store the results in \a dst. /// /// \headerfile /// /// \code /// __m256i /// _mm256_madd52hi_avx_epu64 (__m256i __X, __m256i __Y, __m256i __Z) /// \endcode /// /// This intrinsic corresponds to the \c VPMADD52HUQ instruction. /// /// \return /// return __m256i dst. /// \param __X /// A 256-bit vector of [4 x i64] /// \param __Y /// A 256-bit vector of [4 x i64] /// \param __Z /// A 256-bit vector of [4 x i64] /// /// \code{.operation} /// FOR j := 0 to 3 /// i := j*64 /// tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) /// dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[103:52]) /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_madd52hi_avx_epu64(__m256i __X, __m256i __Y, __m256i __Z) { return (__m256i)__builtin_ia32_vpmadd52huq256((__v4di)__X, (__v4di)__Y, (__v4di)__Z); } /// Multiply packed unsigned 52-bit integers in each 64-bit element of \a __Y /// and \a __Z to form a 104-bit intermediate result. Add the low 52-bit /// unsigned integer from the intermediate result with the corresponding /// unsigned 64-bit integer in \a __X, and store the results in \a dst. /// /// \headerfile /// /// \code /// __m128i /// _mm_madd52lo_avx_epu64 (__m128i __X, __m128i __Y, __m128i __Z) /// \endcode /// /// This intrinsic corresponds to the \c VPMADD52LUQ instruction. /// /// \return /// return __m128i dst. /// \param __X /// A 128-bit vector of [2 x i64] /// \param __Y /// A 128-bit vector of [2 x i64] /// \param __Z /// A 128-bit vector of [2 x i64] /// /// \code{.operation} /// FOR j := 0 to 1 /// i := j*64 /// tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) /// dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[51:0]) /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_madd52lo_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) { return (__m128i)__builtin_ia32_vpmadd52luq128((__v2di)__X, (__v2di)__Y, (__v2di)__Z); } /// Multiply packed unsigned 52-bit integers in each 64-bit element of \a __Y /// and \a __Z to form a 104-bit intermediate result. Add the low 52-bit /// unsigned integer from the intermediate result with the corresponding /// unsigned 64-bit integer in \a __X, and store the results in \a dst. /// /// \headerfile /// /// \code /// __m256i /// _mm256_madd52lo_avx_epu64 (__m256i __X, __m256i __Y, __m256i __Z) /// \endcode /// /// This intrinsic corresponds to the \c VPMADD52LUQ instruction. /// /// \return /// return __m256i dst. /// \param __X /// A 256-bit vector of [4 x i64] /// \param __Y /// A 256-bit vector of [4 x i64] /// \param __Z /// A 256-bit vector of [4 x i64] /// /// \code{.operation} /// FOR j := 0 to 3 /// i := j*64 /// tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) /// dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[51:0]) /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_madd52lo_avx_epu64(__m256i __X, __m256i __Y, __m256i __Z) { return (__m256i)__builtin_ia32_vpmadd52luq256((__v4di)__X, (__v4di)__Y, (__v4di)__Z); } #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #endif // __AVXIFMAINTRIN_H /*===---- avxintrin.h - AVX intrinsics -------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __AVXINTRIN_H #define __AVXINTRIN_H typedef double __v4df __attribute__ ((__vector_size__ (32))); typedef float __v8sf __attribute__ ((__vector_size__ (32))); typedef long long __v4di __attribute__ ((__vector_size__ (32))); typedef int __v8si __attribute__ ((__vector_size__ (32))); typedef short __v16hi __attribute__ ((__vector_size__ (32))); typedef char __v32qi __attribute__ ((__vector_size__ (32))); /* Unsigned types */ typedef unsigned long long __v4du __attribute__ ((__vector_size__ (32))); typedef unsigned int __v8su __attribute__ ((__vector_size__ (32))); typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32))); typedef unsigned char __v32qu __attribute__ ((__vector_size__ (32))); /* We need an explicitly signed variant for char. Note that this shouldn't * appear in the interface though. */ typedef signed char __v32qs __attribute__((__vector_size__(32))); typedef float __m256 __attribute__ ((__vector_size__ (32), __aligned__(32))); typedef double __m256d __attribute__((__vector_size__(32), __aligned__(32))); typedef long long __m256i __attribute__((__vector_size__(32), __aligned__(32))); typedef float __m256_u __attribute__ ((__vector_size__ (32), __aligned__(1))); typedef double __m256d_u __attribute__((__vector_size__(32), __aligned__(1))); typedef long long __m256i_u __attribute__((__vector_size__(32), __aligned__(1))); #ifdef __SSE2__ /* Both _Float16 and __bf16 require SSE2 being enabled. */ typedef _Float16 __v16hf __attribute__((__vector_size__(32), __aligned__(32))); typedef _Float16 __m256h __attribute__((__vector_size__(32), __aligned__(32))); typedef _Float16 __m256h_u __attribute__((__vector_size__(32), __aligned__(1))); typedef __bf16 __v16bf __attribute__((__vector_size__(32), __aligned__(32))); typedef __bf16 __m256bh __attribute__((__vector_size__(32), __aligned__(32))); #endif /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx"), __min_vector_width__(256))) #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx"), __min_vector_width__(128))) /* Arithmetic */ /// Adds two 256-bit vectors of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VADDPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double] containing one of the source operands. /// \param __b /// A 256-bit vector of [4 x double] containing one of the source operands. /// \returns A 256-bit vector of [4 x double] containing the sums of both /// operands. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_add_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4df)__a+(__v4df)__b); } /// Adds two 256-bit vectors of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VADDPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float] containing one of the source operands. /// \param __b /// A 256-bit vector of [8 x float] containing one of the source operands. /// \returns A 256-bit vector of [8 x float] containing the sums of both /// operands. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_add_ps(__m256 __a, __m256 __b) { return (__m256)((__v8sf)__a+(__v8sf)__b); } /// Subtracts two 256-bit vectors of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VSUBPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double] containing the minuend. /// \param __b /// A 256-bit vector of [4 x double] containing the subtrahend. /// \returns A 256-bit vector of [4 x double] containing the differences between /// both operands. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_sub_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4df)__a-(__v4df)__b); } /// Subtracts two 256-bit vectors of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VSUBPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float] containing the minuend. /// \param __b /// A 256-bit vector of [8 x float] containing the subtrahend. /// \returns A 256-bit vector of [8 x float] containing the differences between /// both operands. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_sub_ps(__m256 __a, __m256 __b) { return (__m256)((__v8sf)__a-(__v8sf)__b); } /// Adds the even-indexed values and subtracts the odd-indexed values of /// two 256-bit vectors of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VADDSUBPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double] containing the left source operand. /// \param __b /// A 256-bit vector of [4 x double] containing the right source operand. /// \returns A 256-bit vector of [4 x double] containing the alternating sums /// and differences between both operands. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_addsub_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b); } /// Adds the even-indexed values and subtracts the odd-indexed values of /// two 256-bit vectors of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VADDSUBPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float] containing the left source operand. /// \param __b /// A 256-bit vector of [8 x float] containing the right source operand. /// \returns A 256-bit vector of [8 x float] containing the alternating sums and /// differences between both operands. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_addsub_ps(__m256 __a, __m256 __b) { return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b); } /// Divides two 256-bit vectors of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VDIVPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double] containing the dividend. /// \param __b /// A 256-bit vector of [4 x double] containing the divisor. /// \returns A 256-bit vector of [4 x double] containing the quotients of both /// operands. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_div_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4df)__a/(__v4df)__b); } /// Divides two 256-bit vectors of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VDIVPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float] containing the dividend. /// \param __b /// A 256-bit vector of [8 x float] containing the divisor. /// \returns A 256-bit vector of [8 x float] containing the quotients of both /// operands. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_div_ps(__m256 __a, __m256 __b) { return (__m256)((__v8sf)__a/(__v8sf)__b); } /// Compares two 256-bit vectors of [4 x double] and returns the greater /// of each pair of values. /// /// \headerfile /// /// This intrinsic corresponds to the VMAXPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double] containing one of the operands. /// \param __b /// A 256-bit vector of [4 x double] containing one of the operands. /// \returns A 256-bit vector of [4 x double] containing the maximum values /// between both operands. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_max_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_maxpd256((__v4df)__a, (__v4df)__b); } /// Compares two 256-bit vectors of [8 x float] and returns the greater /// of each pair of values. /// /// \headerfile /// /// This intrinsic corresponds to the VMAXPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float] containing one of the operands. /// \param __b /// A 256-bit vector of [8 x float] containing one of the operands. /// \returns A 256-bit vector of [8 x float] containing the maximum values /// between both operands. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_max_ps(__m256 __a, __m256 __b) { return (__m256)__builtin_ia32_maxps256((__v8sf)__a, (__v8sf)__b); } /// Compares two 256-bit vectors of [4 x double] and returns the lesser /// of each pair of values. /// /// \headerfile /// /// This intrinsic corresponds to the VMINPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double] containing one of the operands. /// \param __b /// A 256-bit vector of [4 x double] containing one of the operands. /// \returns A 256-bit vector of [4 x double] containing the minimum values /// between both operands. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_min_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_minpd256((__v4df)__a, (__v4df)__b); } /// Compares two 256-bit vectors of [8 x float] and returns the lesser /// of each pair of values. /// /// \headerfile /// /// This intrinsic corresponds to the VMINPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float] containing one of the operands. /// \param __b /// A 256-bit vector of [8 x float] containing one of the operands. /// \returns A 256-bit vector of [8 x float] containing the minimum values /// between both operands. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_min_ps(__m256 __a, __m256 __b) { return (__m256)__builtin_ia32_minps256((__v8sf)__a, (__v8sf)__b); } /// Multiplies two 256-bit vectors of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VMULPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double] containing one of the operands. /// \param __b /// A 256-bit vector of [4 x double] containing one of the operands. /// \returns A 256-bit vector of [4 x double] containing the products of both /// operands. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_mul_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4df)__a * (__v4df)__b); } /// Multiplies two 256-bit vectors of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VMULPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float] containing one of the operands. /// \param __b /// A 256-bit vector of [8 x float] containing one of the operands. /// \returns A 256-bit vector of [8 x float] containing the products of both /// operands. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_mul_ps(__m256 __a, __m256 __b) { return (__m256)((__v8sf)__a * (__v8sf)__b); } /// Calculates the square roots of the values in a 256-bit vector of /// [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VSQRTPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double]. /// \returns A 256-bit vector of [4 x double] containing the square roots of the /// values in the operand. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_sqrt_pd(__m256d __a) { return (__m256d)__builtin_ia32_sqrtpd256((__v4df)__a); } /// Calculates the square roots of the values in a 256-bit vector of /// [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VSQRTPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float]. /// \returns A 256-bit vector of [8 x float] containing the square roots of the /// values in the operand. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_sqrt_ps(__m256 __a) { return (__m256)__builtin_ia32_sqrtps256((__v8sf)__a); } /// Calculates the reciprocal square roots of the values in a 256-bit /// vector of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VRSQRTPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float]. /// \returns A 256-bit vector of [8 x float] containing the reciprocal square /// roots of the values in the operand. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_rsqrt_ps(__m256 __a) { return (__m256)__builtin_ia32_rsqrtps256((__v8sf)__a); } /// Calculates the reciprocals of the values in a 256-bit vector of /// [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VRCPPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float]. /// \returns A 256-bit vector of [8 x float] containing the reciprocals of the /// values in the operand. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_rcp_ps(__m256 __a) { return (__m256)__builtin_ia32_rcpps256((__v8sf)__a); } /// Rounds the values in a 256-bit vector of [4 x double] as specified /// by the byte operand. The source values are rounded to integer values and /// returned as 64-bit double-precision floating-point values. /// /// \headerfile /// /// \code /// __m256d _mm256_round_pd(__m256d V, const int M); /// \endcode /// /// This intrinsic corresponds to the VROUNDPD instruction. /// /// \param V /// A 256-bit vector of [4 x double]. /// \param M /// An integer value that specifies the rounding operation. \n /// Bits [7:4] are reserved. \n /// Bit [3] is a precision exception value: \n /// 0: A normal PE exception is used. \n /// 1: The PE field is not updated. \n /// Bit [2] is the rounding control source: \n /// 0: Use bits [1:0] of \a M. \n /// 1: Use the current MXCSR setting. \n /// Bits [1:0] contain the rounding control definition: \n /// 00: Nearest. \n /// 01: Downward (toward negative infinity). \n /// 10: Upward (toward positive infinity). \n /// 11: Truncated. /// \returns A 256-bit vector of [4 x double] containing the rounded values. #define _mm256_round_pd(V, M) \ ((__m256d)__builtin_ia32_roundpd256((__v4df)(__m256d)(V), (M))) /// Rounds the values stored in a 256-bit vector of [8 x float] as /// specified by the byte operand. The source values are rounded to integer /// values and returned as floating-point values. /// /// \headerfile /// /// \code /// __m256 _mm256_round_ps(__m256 V, const int M); /// \endcode /// /// This intrinsic corresponds to the VROUNDPS instruction. /// /// \param V /// A 256-bit vector of [8 x float]. /// \param M /// An integer value that specifies the rounding operation. \n /// Bits [7:4] are reserved. \n /// Bit [3] is a precision exception value: \n /// 0: A normal PE exception is used. \n /// 1: The PE field is not updated. \n /// Bit [2] is the rounding control source: \n /// 0: Use bits [1:0] of \a M. \n /// 1: Use the current MXCSR setting. \n /// Bits [1:0] contain the rounding control definition: \n /// 00: Nearest. \n /// 01: Downward (toward negative infinity). \n /// 10: Upward (toward positive infinity). \n /// 11: Truncated. /// \returns A 256-bit vector of [8 x float] containing the rounded values. #define _mm256_round_ps(V, M) \ ((__m256)__builtin_ia32_roundps256((__v8sf)(__m256)(V), (M))) /// Rounds up the values stored in a 256-bit vector of [4 x double]. The /// source values are rounded up to integer values and returned as 64-bit /// double-precision floating-point values. /// /// \headerfile /// /// \code /// __m256d _mm256_ceil_pd(__m256d V); /// \endcode /// /// This intrinsic corresponds to the VROUNDPD instruction. /// /// \param V /// A 256-bit vector of [4 x double]. /// \returns A 256-bit vector of [4 x double] containing the rounded up values. #define _mm256_ceil_pd(V) _mm256_round_pd((V), _MM_FROUND_CEIL) /// Rounds down the values stored in a 256-bit vector of [4 x double]. /// The source values are rounded down to integer values and returned as /// 64-bit double-precision floating-point values. /// /// \headerfile /// /// \code /// __m256d _mm256_floor_pd(__m256d V); /// \endcode /// /// This intrinsic corresponds to the VROUNDPD instruction. /// /// \param V /// A 256-bit vector of [4 x double]. /// \returns A 256-bit vector of [4 x double] containing the rounded down /// values. #define _mm256_floor_pd(V) _mm256_round_pd((V), _MM_FROUND_FLOOR) /// Rounds up the values stored in a 256-bit vector of [8 x float]. The /// source values are rounded up to integer values and returned as /// floating-point values. /// /// \headerfile /// /// \code /// __m256 _mm256_ceil_ps(__m256 V); /// \endcode /// /// This intrinsic corresponds to the VROUNDPS instruction. /// /// \param V /// A 256-bit vector of [8 x float]. /// \returns A 256-bit vector of [8 x float] containing the rounded up values. #define _mm256_ceil_ps(V) _mm256_round_ps((V), _MM_FROUND_CEIL) /// Rounds down the values stored in a 256-bit vector of [8 x float]. The /// source values are rounded down to integer values and returned as /// floating-point values. /// /// \headerfile /// /// \code /// __m256 _mm256_floor_ps(__m256 V); /// \endcode /// /// This intrinsic corresponds to the VROUNDPS instruction. /// /// \param V /// A 256-bit vector of [8 x float]. /// \returns A 256-bit vector of [8 x float] containing the rounded down values. #define _mm256_floor_ps(V) _mm256_round_ps((V), _MM_FROUND_FLOOR) /* Logical */ /// Performs a bitwise AND of two 256-bit vectors of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VANDPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double] containing one of the source operands. /// \param __b /// A 256-bit vector of [4 x double] containing one of the source operands. /// \returns A 256-bit vector of [4 x double] containing the bitwise AND of the /// values between both operands. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_and_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4du)__a & (__v4du)__b); } /// Performs a bitwise AND of two 256-bit vectors of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VANDPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float] containing one of the source operands. /// \param __b /// A 256-bit vector of [8 x float] containing one of the source operands. /// \returns A 256-bit vector of [8 x float] containing the bitwise AND of the /// values between both operands. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_and_ps(__m256 __a, __m256 __b) { return (__m256)((__v8su)__a & (__v8su)__b); } /// Performs a bitwise AND of two 256-bit vectors of [4 x double], using /// the one's complement of the values contained in the first source operand. /// /// \headerfile /// /// This intrinsic corresponds to the VANDNPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double] containing the left source operand. The /// one's complement of this value is used in the bitwise AND. /// \param __b /// A 256-bit vector of [4 x double] containing the right source operand. /// \returns A 256-bit vector of [4 x double] containing the bitwise AND of the /// values of the second operand and the one's complement of the first /// operand. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_andnot_pd(__m256d __a, __m256d __b) { return (__m256d)(~(__v4du)__a & (__v4du)__b); } /// Performs a bitwise AND of two 256-bit vectors of [8 x float], using /// the one's complement of the values contained in the first source operand. /// /// \headerfile /// /// This intrinsic corresponds to the VANDNPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float] containing the left source operand. The /// one's complement of this value is used in the bitwise AND. /// \param __b /// A 256-bit vector of [8 x float] containing the right source operand. /// \returns A 256-bit vector of [8 x float] containing the bitwise AND of the /// values of the second operand and the one's complement of the first /// operand. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_andnot_ps(__m256 __a, __m256 __b) { return (__m256)(~(__v8su)__a & (__v8su)__b); } /// Performs a bitwise OR of two 256-bit vectors of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VORPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double] containing one of the source operands. /// \param __b /// A 256-bit vector of [4 x double] containing one of the source operands. /// \returns A 256-bit vector of [4 x double] containing the bitwise OR of the /// values between both operands. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_or_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4du)__a | (__v4du)__b); } /// Performs a bitwise OR of two 256-bit vectors of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VORPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float] containing one of the source operands. /// \param __b /// A 256-bit vector of [8 x float] containing one of the source operands. /// \returns A 256-bit vector of [8 x float] containing the bitwise OR of the /// values between both operands. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_or_ps(__m256 __a, __m256 __b) { return (__m256)((__v8su)__a | (__v8su)__b); } /// Performs a bitwise XOR of two 256-bit vectors of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VXORPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double] containing one of the source operands. /// \param __b /// A 256-bit vector of [4 x double] containing one of the source operands. /// \returns A 256-bit vector of [4 x double] containing the bitwise XOR of the /// values between both operands. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_xor_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4du)__a ^ (__v4du)__b); } /// Performs a bitwise XOR of two 256-bit vectors of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VXORPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float] containing one of the source operands. /// \param __b /// A 256-bit vector of [8 x float] containing one of the source operands. /// \returns A 256-bit vector of [8 x float] containing the bitwise XOR of the /// values between both operands. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_xor_ps(__m256 __a, __m256 __b) { return (__m256)((__v8su)__a ^ (__v8su)__b); } /* Horizontal arithmetic */ /// Horizontally adds the adjacent pairs of values contained in two /// 256-bit vectors of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VHADDPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double] containing one of the source operands. /// The horizontal sums of the values are returned in the even-indexed /// elements of a vector of [4 x double]. /// \param __b /// A 256-bit vector of [4 x double] containing one of the source operands. /// The horizontal sums of the values are returned in the odd-indexed /// elements of a vector of [4 x double]. /// \returns A 256-bit vector of [4 x double] containing the horizontal sums of /// both operands. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_hadd_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b); } /// Horizontally adds the adjacent pairs of values contained in two /// 256-bit vectors of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VHADDPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float] containing one of the source operands. /// The horizontal sums of the values are returned in the elements with /// index 0, 1, 4, 5 of a vector of [8 x float]. /// \param __b /// A 256-bit vector of [8 x float] containing one of the source operands. /// The horizontal sums of the values are returned in the elements with /// index 2, 3, 6, 7 of a vector of [8 x float]. /// \returns A 256-bit vector of [8 x float] containing the horizontal sums of /// both operands. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_hadd_ps(__m256 __a, __m256 __b) { return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b); } /// Horizontally subtracts the adjacent pairs of values contained in two /// 256-bit vectors of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VHSUBPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double] containing one of the source operands. /// The horizontal differences between the values are returned in the /// even-indexed elements of a vector of [4 x double]. /// \param __b /// A 256-bit vector of [4 x double] containing one of the source operands. /// The horizontal differences between the values are returned in the /// odd-indexed elements of a vector of [4 x double]. /// \returns A 256-bit vector of [4 x double] containing the horizontal /// differences of both operands. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_hsub_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b); } /// Horizontally subtracts the adjacent pairs of values contained in two /// 256-bit vectors of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VHSUBPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float] containing one of the source operands. /// The horizontal differences between the values are returned in the /// elements with index 0, 1, 4, 5 of a vector of [8 x float]. /// \param __b /// A 256-bit vector of [8 x float] containing one of the source operands. /// The horizontal differences between the values are returned in the /// elements with index 2, 3, 6, 7 of a vector of [8 x float]. /// \returns A 256-bit vector of [8 x float] containing the horizontal /// differences of both operands. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_hsub_ps(__m256 __a, __m256 __b) { return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b); } /* Vector permutations */ /// Copies the values in a 128-bit vector of [2 x double] as specified /// by the 128-bit integer vector operand. /// /// \headerfile /// /// This intrinsic corresponds to the VPERMILPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \param __c /// A 128-bit integer vector operand specifying how the values are to be /// copied. \n /// Bit [1]: \n /// 0: Bits [63:0] of the source are copied to bits [63:0] of the returned /// vector. \n /// 1: Bits [127:64] of the source are copied to bits [63:0] of the /// returned vector. \n /// Bit [65]: \n /// 0: Bits [63:0] of the source are copied to bits [127:64] of the /// returned vector. \n /// 1: Bits [127:64] of the source are copied to bits [127:64] of the /// returned vector. /// \returns A 128-bit vector of [2 x double] containing the copied values. static __inline __m128d __DEFAULT_FN_ATTRS128 _mm_permutevar_pd(__m128d __a, __m128i __c) { return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c); } /// Copies the values in a 256-bit vector of [4 x double] as specified /// by the 256-bit integer vector operand. /// /// \headerfile /// /// This intrinsic corresponds to the VPERMILPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double]. /// \param __c /// A 256-bit integer vector operand specifying how the values are to be /// copied. \n /// Bit [1]: \n /// 0: Bits [63:0] of the source are copied to bits [63:0] of the returned /// vector. \n /// 1: Bits [127:64] of the source are copied to bits [63:0] of the /// returned vector. \n /// Bit [65]: \n /// 0: Bits [63:0] of the source are copied to bits [127:64] of the /// returned vector. \n /// 1: Bits [127:64] of the source are copied to bits [127:64] of the /// returned vector. \n /// Bit [129]: \n /// 0: Bits [191:128] of the source are copied to bits [191:128] of the /// returned vector. \n /// 1: Bits [255:192] of the source are copied to bits [191:128] of the /// returned vector. \n /// Bit [193]: \n /// 0: Bits [191:128] of the source are copied to bits [255:192] of the /// returned vector. \n /// 1: Bits [255:192] of the source are copied to bits [255:192] of the /// returned vector. /// \returns A 256-bit vector of [4 x double] containing the copied values. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_permutevar_pd(__m256d __a, __m256i __c) { return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c); } /// Copies the values stored in a 128-bit vector of [4 x float] as /// specified by the 128-bit integer vector operand. /// \headerfile /// /// This intrinsic corresponds to the VPERMILPS instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __c /// A 128-bit integer vector operand specifying how the values are to be /// copied. \n /// Bits [1:0]: \n /// 00: Bits [31:0] of the source are copied to bits [31:0] of the /// returned vector. \n /// 01: Bits [63:32] of the source are copied to bits [31:0] of the /// returned vector. \n /// 10: Bits [95:64] of the source are copied to bits [31:0] of the /// returned vector. \n /// 11: Bits [127:96] of the source are copied to bits [31:0] of the /// returned vector. \n /// Bits [33:32]: \n /// 00: Bits [31:0] of the source are copied to bits [63:32] of the /// returned vector. \n /// 01: Bits [63:32] of the source are copied to bits [63:32] of the /// returned vector. \n /// 10: Bits [95:64] of the source are copied to bits [63:32] of the /// returned vector. \n /// 11: Bits [127:96] of the source are copied to bits [63:32] of the /// returned vector. \n /// Bits [65:64]: \n /// 00: Bits [31:0] of the source are copied to bits [95:64] of the /// returned vector. \n /// 01: Bits [63:32] of the source are copied to bits [95:64] of the /// returned vector. \n /// 10: Bits [95:64] of the source are copied to bits [95:64] of the /// returned vector. \n /// 11: Bits [127:96] of the source are copied to bits [95:64] of the /// returned vector. \n /// Bits [97:96]: \n /// 00: Bits [31:0] of the source are copied to bits [127:96] of the /// returned vector. \n /// 01: Bits [63:32] of the source are copied to bits [127:96] of the /// returned vector. \n /// 10: Bits [95:64] of the source are copied to bits [127:96] of the /// returned vector. \n /// 11: Bits [127:96] of the source are copied to bits [127:96] of the /// returned vector. /// \returns A 128-bit vector of [4 x float] containing the copied values. static __inline __m128 __DEFAULT_FN_ATTRS128 _mm_permutevar_ps(__m128 __a, __m128i __c) { return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c); } /// Copies the values stored in a 256-bit vector of [8 x float] as /// specified by the 256-bit integer vector operand. /// /// \headerfile /// /// This intrinsic corresponds to the VPERMILPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float]. /// \param __c /// A 256-bit integer vector operand specifying how the values are to be /// copied. \n /// Bits [1:0]: \n /// 00: Bits [31:0] of the source are copied to bits [31:0] of the /// returned vector. \n /// 01: Bits [63:32] of the source are copied to bits [31:0] of the /// returned vector. \n /// 10: Bits [95:64] of the source are copied to bits [31:0] of the /// returned vector. \n /// 11: Bits [127:96] of the source are copied to bits [31:0] of the /// returned vector. \n /// Bits [33:32]: \n /// 00: Bits [31:0] of the source are copied to bits [63:32] of the /// returned vector. \n /// 01: Bits [63:32] of the source are copied to bits [63:32] of the /// returned vector. \n /// 10: Bits [95:64] of the source are copied to bits [63:32] of the /// returned vector. \n /// 11: Bits [127:96] of the source are copied to bits [63:32] of the /// returned vector. \n /// Bits [65:64]: \n /// 00: Bits [31:0] of the source are copied to bits [95:64] of the /// returned vector. \n /// 01: Bits [63:32] of the source are copied to bits [95:64] of the /// returned vector. \n /// 10: Bits [95:64] of the source are copied to bits [95:64] of the /// returned vector. \n /// 11: Bits [127:96] of the source are copied to bits [95:64] of the /// returned vector. \n /// Bits [97:96]: \n /// 00: Bits [31:0] of the source are copied to bits [127:96] of the /// returned vector. \n /// 01: Bits [63:32] of the source are copied to bits [127:96] of the /// returned vector. \n /// 10: Bits [95:64] of the source are copied to bits [127:96] of the /// returned vector. \n /// 11: Bits [127:96] of the source are copied to bits [127:96] of the /// returned vector. \n /// Bits [129:128]: \n /// 00: Bits [159:128] of the source are copied to bits [159:128] of the /// returned vector. \n /// 01: Bits [191:160] of the source are copied to bits [159:128] of the /// returned vector. \n /// 10: Bits [223:192] of the source are copied to bits [159:128] of the /// returned vector. \n /// 11: Bits [255:224] of the source are copied to bits [159:128] of the /// returned vector. \n /// Bits [161:160]: \n /// 00: Bits [159:128] of the source are copied to bits [191:160] of the /// returned vector. \n /// 01: Bits [191:160] of the source are copied to bits [191:160] of the /// returned vector. \n /// 10: Bits [223:192] of the source are copied to bits [191:160] of the /// returned vector. \n /// 11: Bits [255:224] of the source are copied to bits [191:160] of the /// returned vector. \n /// Bits [193:192]: \n /// 00: Bits [159:128] of the source are copied to bits [223:192] of the /// returned vector. \n /// 01: Bits [191:160] of the source are copied to bits [223:192] of the /// returned vector. \n /// 10: Bits [223:192] of the source are copied to bits [223:192] of the /// returned vector. \n /// 11: Bits [255:224] of the source are copied to bits [223:192] of the /// returned vector. \n /// Bits [225:224]: \n /// 00: Bits [159:128] of the source are copied to bits [255:224] of the /// returned vector. \n /// 01: Bits [191:160] of the source are copied to bits [255:224] of the /// returned vector. \n /// 10: Bits [223:192] of the source are copied to bits [255:224] of the /// returned vector. \n /// 11: Bits [255:224] of the source are copied to bits [255:224] of the /// returned vector. /// \returns A 256-bit vector of [8 x float] containing the copied values. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_permutevar_ps(__m256 __a, __m256i __c) { return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c); } /// Copies the values in a 128-bit vector of [2 x double] as specified /// by the immediate integer operand. /// /// \headerfile /// /// \code /// __m128d _mm_permute_pd(__m128d A, const int C); /// \endcode /// /// This intrinsic corresponds to the VPERMILPD instruction. /// /// \param A /// A 128-bit vector of [2 x double]. /// \param C /// An immediate integer operand specifying how the values are to be /// copied. \n /// Bit [0]: \n /// 0: Bits [63:0] of the source are copied to bits [63:0] of the returned /// vector. \n /// 1: Bits [127:64] of the source are copied to bits [63:0] of the /// returned vector. \n /// Bit [1]: \n /// 0: Bits [63:0] of the source are copied to bits [127:64] of the /// returned vector. \n /// 1: Bits [127:64] of the source are copied to bits [127:64] of the /// returned vector. /// \returns A 128-bit vector of [2 x double] containing the copied values. #define _mm_permute_pd(A, C) \ ((__m128d)__builtin_ia32_vpermilpd((__v2df)(__m128d)(A), (int)(C))) /// Copies the values in a 256-bit vector of [4 x double] as specified by /// the immediate integer operand. /// /// \headerfile /// /// \code /// __m256d _mm256_permute_pd(__m256d A, const int C); /// \endcode /// /// This intrinsic corresponds to the VPERMILPD instruction. /// /// \param A /// A 256-bit vector of [4 x double]. /// \param C /// An immediate integer operand specifying how the values are to be /// copied. \n /// Bit [0]: \n /// 0: Bits [63:0] of the source are copied to bits [63:0] of the returned /// vector. \n /// 1: Bits [127:64] of the source are copied to bits [63:0] of the /// returned vector. \n /// Bit [1]: \n /// 0: Bits [63:0] of the source are copied to bits [127:64] of the /// returned vector. \n /// 1: Bits [127:64] of the source are copied to bits [127:64] of the /// returned vector. \n /// Bit [2]: \n /// 0: Bits [191:128] of the source are copied to bits [191:128] of the /// returned vector. \n /// 1: Bits [255:192] of the source are copied to bits [191:128] of the /// returned vector. \n /// Bit [3]: \n /// 0: Bits [191:128] of the source are copied to bits [255:192] of the /// returned vector. \n /// 1: Bits [255:192] of the source are copied to bits [255:192] of the /// returned vector. /// \returns A 256-bit vector of [4 x double] containing the copied values. #define _mm256_permute_pd(A, C) \ ((__m256d)__builtin_ia32_vpermilpd256((__v4df)(__m256d)(A), (int)(C))) /// Copies the values in a 128-bit vector of [4 x float] as specified by /// the immediate integer operand. /// /// \headerfile /// /// \code /// __m128 _mm_permute_ps(__m128 A, const int C); /// \endcode /// /// This intrinsic corresponds to the VPERMILPS instruction. /// /// \param A /// A 128-bit vector of [4 x float]. /// \param C /// An immediate integer operand specifying how the values are to be /// copied. \n /// Bits [1:0]: \n /// 00: Bits [31:0] of the source are copied to bits [31:0] of the /// returned vector. \n /// 01: Bits [63:32] of the source are copied to bits [31:0] of the /// returned vector. \n /// 10: Bits [95:64] of the source are copied to bits [31:0] of the /// returned vector. \n /// 11: Bits [127:96] of the source are copied to bits [31:0] of the /// returned vector. \n /// Bits [3:2]: \n /// 00: Bits [31:0] of the source are copied to bits [63:32] of the /// returned vector. \n /// 01: Bits [63:32] of the source are copied to bits [63:32] of the /// returned vector. \n /// 10: Bits [95:64] of the source are copied to bits [63:32] of the /// returned vector. \n /// 11: Bits [127:96] of the source are copied to bits [63:32] of the /// returned vector. \n /// Bits [5:4]: \n /// 00: Bits [31:0] of the source are copied to bits [95:64] of the /// returned vector. \n /// 01: Bits [63:32] of the source are copied to bits [95:64] of the /// returned vector. \n /// 10: Bits [95:64] of the source are copied to bits [95:64] of the /// returned vector. \n /// 11: Bits [127:96] of the source are copied to bits [95:64] of the /// returned vector. \n /// Bits [7:6]: \n /// 00: Bits [31:0] of the source are copied to bits [127:96] of the /// returned vector. \n /// 01: Bits [63:32] of the source are copied to bits [127:96] of the /// returned vector. \n /// 10: Bits [95:64] of the source are copied to bits [127:96] of the /// returned vector. \n /// 11: Bits [127:96] of the source are copied to bits [127:96] of the /// returned vector. /// \returns A 128-bit vector of [4 x float] containing the copied values. #define _mm_permute_ps(A, C) \ ((__m128)__builtin_ia32_vpermilps((__v4sf)(__m128)(A), (int)(C))) /// Copies the values in a 256-bit vector of [8 x float] as specified by /// the immediate integer operand. /// /// \headerfile /// /// \code /// __m256 _mm256_permute_ps(__m256 A, const int C); /// \endcode /// /// This intrinsic corresponds to the VPERMILPS instruction. /// /// \param A /// A 256-bit vector of [8 x float]. /// \param C /// An immediate integer operand specifying how the values are to be /// copied. \n /// Bits [1:0]: \n /// 00: Bits [31:0] of the source are copied to bits [31:0] of the /// returned vector. \n /// 01: Bits [63:32] of the source are copied to bits [31:0] of the /// returned vector. \n /// 10: Bits [95:64] of the source are copied to bits [31:0] of the /// returned vector. \n /// 11: Bits [127:96] of the source are copied to bits [31:0] of the /// returned vector. \n /// Bits [3:2]: \n /// 00: Bits [31:0] of the source are copied to bits [63:32] of the /// returned vector. \n /// 01: Bits [63:32] of the source are copied to bits [63:32] of the /// returned vector. \n /// 10: Bits [95:64] of the source are copied to bits [63:32] of the /// returned vector. \n /// 11: Bits [127:96] of the source are copied to bits [63:32] of the /// returned vector. \n /// Bits [5:4]: \n /// 00: Bits [31:0] of the source are copied to bits [95:64] of the /// returned vector. \n /// 01: Bits [63:32] of the source are copied to bits [95:64] of the /// returned vector. \n /// 10: Bits [95:64] of the source are copied to bits [95:64] of the /// returned vector. \n /// 11: Bits [127:96] of the source are copied to bits [95:64] of the /// returned vector. \n /// Bits [7:6]: \n /// 00: Bits [31:0] of the source are copied to bits [127:96] of the /// returned vector. \n /// 01: Bits [63:32] of the source are copied to bits [127:96] of the /// returned vector. \n /// 10: Bits [95:64] of the source are copied to bits [127:96] of the /// returned vector. \n /// 11: Bits [127:96] of the source are copied to bits [127:96] of the /// returned vector. \n /// Bits [1:0]: \n /// 00: Bits [159:128] of the source are copied to bits [159:128] of the /// returned vector. \n /// 01: Bits [191:160] of the source are copied to bits [159:128] of the /// returned vector. \n /// 10: Bits [223:192] of the source are copied to bits [159:128] of the /// returned vector. \n /// 11: Bits [255:224] of the source are copied to bits [159:128] of the /// returned vector. \n /// Bits [3:2]: \n /// 00: Bits [159:128] of the source are copied to bits [191:160] of the /// returned vector. \n /// 01: Bits [191:160] of the source are copied to bits [191:160] of the /// returned vector. \n /// 10: Bits [223:192] of the source are copied to bits [191:160] of the /// returned vector. \n /// 11: Bits [255:224] of the source are copied to bits [191:160] of the /// returned vector. \n /// Bits [5:4]: \n /// 00: Bits [159:128] of the source are copied to bits [223:192] of the /// returned vector. \n /// 01: Bits [191:160] of the source are copied to bits [223:192] of the /// returned vector. \n /// 10: Bits [223:192] of the source are copied to bits [223:192] of the /// returned vector. \n /// 11: Bits [255:224] of the source are copied to bits [223:192] of the /// returned vector. \n /// Bits [7:6]: \n /// 00: Bits [159:128] of the source are copied to bits [255:224] of the /// returned vector. \n /// 01: Bits [191:160] of the source are copied to bits [255:224] of the /// returned vector. \n /// 10: Bits [223:192] of the source are copied to bits [255:224] of the /// returned vector. \n /// 11: Bits [255:224] of the source are copied to bits [255:224] of the /// returned vector. /// \returns A 256-bit vector of [8 x float] containing the copied values. #define _mm256_permute_ps(A, C) \ ((__m256)__builtin_ia32_vpermilps256((__v8sf)(__m256)(A), (int)(C))) /// Permutes 128-bit data values stored in two 256-bit vectors of /// [4 x double], as specified by the immediate integer operand. /// /// \headerfile /// /// \code /// __m256d _mm256_permute2f128_pd(__m256d V1, __m256d V2, const int M); /// \endcode /// /// This intrinsic corresponds to the VPERM2F128 instruction. /// /// \param V1 /// A 256-bit vector of [4 x double]. /// \param V2 /// A 256-bit vector of [4 x double. /// \param M /// An immediate integer operand specifying how the values are to be /// permuted. \n /// Bits [1:0]: \n /// 00: Bits [127:0] of operand \a V1 are copied to bits [127:0] of the /// destination. \n /// 01: Bits [255:128] of operand \a V1 are copied to bits [127:0] of the /// destination. \n /// 10: Bits [127:0] of operand \a V2 are copied to bits [127:0] of the /// destination. \n /// 11: Bits [255:128] of operand \a V2 are copied to bits [127:0] of the /// destination. \n /// Bits [5:4]: \n /// 00: Bits [127:0] of operand \a V1 are copied to bits [255:128] of the /// destination. \n /// 01: Bits [255:128] of operand \a V1 are copied to bits [255:128] of the /// destination. \n /// 10: Bits [127:0] of operand \a V2 are copied to bits [255:128] of the /// destination. \n /// 11: Bits [255:128] of operand \a V2 are copied to bits [255:128] of the /// destination. /// \returns A 256-bit vector of [4 x double] containing the copied values. #define _mm256_permute2f128_pd(V1, V2, M) \ ((__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)(__m256d)(V1), \ (__v4df)(__m256d)(V2), (int)(M))) /// Permutes 128-bit data values stored in two 256-bit vectors of /// [8 x float], as specified by the immediate integer operand. /// /// \headerfile /// /// \code /// __m256 _mm256_permute2f128_ps(__m256 V1, __m256 V2, const int M); /// \endcode /// /// This intrinsic corresponds to the VPERM2F128 instruction. /// /// \param V1 /// A 256-bit vector of [8 x float]. /// \param V2 /// A 256-bit vector of [8 x float]. /// \param M /// An immediate integer operand specifying how the values are to be /// permuted. \n /// Bits [1:0]: \n /// 00: Bits [127:0] of operand \a V1 are copied to bits [127:0] of the /// destination. \n /// 01: Bits [255:128] of operand \a V1 are copied to bits [127:0] of the /// destination. \n /// 10: Bits [127:0] of operand \a V2 are copied to bits [127:0] of the /// destination. \n /// 11: Bits [255:128] of operand \a V2 are copied to bits [127:0] of the /// destination. \n /// Bits [5:4]: \n /// 00: Bits [127:0] of operand \a V1 are copied to bits [255:128] of the /// destination. \n /// 01: Bits [255:128] of operand \a V1 are copied to bits [255:128] of the /// destination. \n /// 10: Bits [127:0] of operand \a V2 are copied to bits [255:128] of the /// destination. \n /// 11: Bits [255:128] of operand \a V2 are copied to bits [255:128] of the /// destination. /// \returns A 256-bit vector of [8 x float] containing the copied values. #define _mm256_permute2f128_ps(V1, V2, M) \ ((__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)(__m256)(V1), \ (__v8sf)(__m256)(V2), (int)(M))) /// Permutes 128-bit data values stored in two 256-bit integer vectors, /// as specified by the immediate integer operand. /// /// \headerfile /// /// \code /// __m256i _mm256_permute2f128_si256(__m256i V1, __m256i V2, const int M); /// \endcode /// /// This intrinsic corresponds to the VPERM2F128 instruction. /// /// \param V1 /// A 256-bit integer vector. /// \param V2 /// A 256-bit integer vector. /// \param M /// An immediate integer operand specifying how the values are to be copied. /// Bits [1:0]: \n /// 00: Bits [127:0] of operand \a V1 are copied to bits [127:0] of the /// destination. \n /// 01: Bits [255:128] of operand \a V1 are copied to bits [127:0] of the /// destination. \n /// 10: Bits [127:0] of operand \a V2 are copied to bits [127:0] of the /// destination. \n /// 11: Bits [255:128] of operand \a V2 are copied to bits [127:0] of the /// destination. \n /// Bits [5:4]: \n /// 00: Bits [127:0] of operand \a V1 are copied to bits [255:128] of the /// destination. \n /// 01: Bits [255:128] of operand \a V1 are copied to bits [255:128] of the /// destination. \n /// 10: Bits [127:0] of operand \a V2 are copied to bits [255:128] of the /// destination. \n /// 11: Bits [255:128] of operand \a V2 are copied to bits [255:128] of the /// destination. /// \returns A 256-bit integer vector containing the copied values. #define _mm256_permute2f128_si256(V1, V2, M) \ ((__m256i)__builtin_ia32_vperm2f128_si256((__v8si)(__m256i)(V1), \ (__v8si)(__m256i)(V2), (int)(M))) /* Vector Blend */ /// Merges 64-bit double-precision data values stored in either of the /// two 256-bit vectors of [4 x double], as specified by the immediate /// integer operand. /// /// \headerfile /// /// \code /// __m256d _mm256_blend_pd(__m256d V1, __m256d V2, const int M); /// \endcode /// /// This intrinsic corresponds to the VBLENDPD instruction. /// /// \param V1 /// A 256-bit vector of [4 x double]. /// \param V2 /// A 256-bit vector of [4 x double]. /// \param M /// An immediate integer operand, with mask bits [3:0] specifying how the /// values are to be copied. The position of the mask bit corresponds to the /// index of a copied value. When a mask bit is 0, the corresponding 64-bit /// element in operand \a V1 is copied to the same position in the /// destination. When a mask bit is 1, the corresponding 64-bit element in /// operand \a V2 is copied to the same position in the destination. /// \returns A 256-bit vector of [4 x double] containing the copied values. #define _mm256_blend_pd(V1, V2, M) \ ((__m256d)__builtin_ia32_blendpd256((__v4df)(__m256d)(V1), \ (__v4df)(__m256d)(V2), (int)(M))) /// Merges 32-bit single-precision data values stored in either of the /// two 256-bit vectors of [8 x float], as specified by the immediate /// integer operand. /// /// \headerfile /// /// \code /// __m256 _mm256_blend_ps(__m256 V1, __m256 V2, const int M); /// \endcode /// /// This intrinsic corresponds to the VBLENDPS instruction. /// /// \param V1 /// A 256-bit vector of [8 x float]. /// \param V2 /// A 256-bit vector of [8 x float]. /// \param M /// An immediate integer operand, with mask bits [7:0] specifying how the /// values are to be copied. The position of the mask bit corresponds to the /// index of a copied value. When a mask bit is 0, the corresponding 32-bit /// element in operand \a V1 is copied to the same position in the /// destination. When a mask bit is 1, the corresponding 32-bit element in /// operand \a V2 is copied to the same position in the destination. /// \returns A 256-bit vector of [8 x float] containing the copied values. #define _mm256_blend_ps(V1, V2, M) \ ((__m256)__builtin_ia32_blendps256((__v8sf)(__m256)(V1), \ (__v8sf)(__m256)(V2), (int)(M))) /// Merges 64-bit double-precision data values stored in either of the /// two 256-bit vectors of [4 x double], as specified by the 256-bit vector /// operand. /// /// \headerfile /// /// This intrinsic corresponds to the VBLENDVPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double]. /// \param __b /// A 256-bit vector of [4 x double]. /// \param __c /// A 256-bit vector operand, with mask bits 255, 191, 127, and 63 specifying /// how the values are to be copied. The position of the mask bit corresponds /// to the most significant bit of a copied value. When a mask bit is 0, the /// corresponding 64-bit element in operand \a __a is copied to the same /// position in the destination. When a mask bit is 1, the corresponding /// 64-bit element in operand \a __b is copied to the same position in the /// destination. /// \returns A 256-bit vector of [4 x double] containing the copied values. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c) { return (__m256d)__builtin_ia32_blendvpd256( (__v4df)__a, (__v4df)__b, (__v4df)__c); } /// Merges 32-bit single-precision data values stored in either of the /// two 256-bit vectors of [8 x float], as specified by the 256-bit vector /// operand. /// /// \headerfile /// /// This intrinsic corresponds to the VBLENDVPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float]. /// \param __b /// A 256-bit vector of [8 x float]. /// \param __c /// A 256-bit vector operand, with mask bits 255, 223, 191, 159, 127, 95, 63, /// and 31 specifying how the values are to be copied. The position of the /// mask bit corresponds to the most significant bit of a copied value. When /// a mask bit is 0, the corresponding 32-bit element in operand \a __a is /// copied to the same position in the destination. When a mask bit is 1, the /// corresponding 32-bit element in operand \a __b is copied to the same /// position in the destination. /// \returns A 256-bit vector of [8 x float] containing the copied values. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) { return (__m256)__builtin_ia32_blendvps256( (__v8sf)__a, (__v8sf)__b, (__v8sf)__c); } /* Vector Dot Product */ /// Computes two dot products in parallel, using the lower and upper /// halves of two [8 x float] vectors as input to the two computations, and /// returning the two dot products in the lower and upper halves of the /// [8 x float] result. /// /// The immediate integer operand controls which input elements will /// contribute to the dot product, and where the final results are returned. /// In general, for each dot product, the four corresponding elements of the /// input vectors are multiplied; the first two and second two products are /// summed, then the two sums are added to form the final result. /// /// \headerfile /// /// \code /// __m256 _mm256_dp_ps(__m256 V1, __m256 V2, const int M); /// \endcode /// /// This intrinsic corresponds to the VDPPS instruction. /// /// \param V1 /// A vector of [8 x float] values, treated as two [4 x float] vectors. /// \param V2 /// A vector of [8 x float] values, treated as two [4 x float] vectors. /// \param M /// An immediate integer argument. Bits [7:4] determine which elements of /// the input vectors are used, with bit [4] corresponding to the lowest /// element and bit [7] corresponding to the highest element of each [4 x /// float] subvector. If a bit is set, the corresponding elements from the /// two input vectors are used as an input for dot product; otherwise that /// input is treated as zero. Bits [3:0] determine which elements of the /// result will receive a copy of the final dot product, with bit [0] /// corresponding to the lowest element and bit [3] corresponding to the /// highest element of each [4 x float] subvector. If a bit is set, the dot /// product is returned in the corresponding element; otherwise that element /// is set to zero. The bitmask is applied in the same way to each of the /// two parallel dot product computations. /// \returns A 256-bit vector of [8 x float] containing the two dot products. #define _mm256_dp_ps(V1, V2, M) \ ((__m256)__builtin_ia32_dpps256((__v8sf)(__m256)(V1), \ (__v8sf)(__m256)(V2), (M))) /* Vector shuffle */ /// Selects 8 float values from the 256-bit operands of [8 x float], as /// specified by the immediate value operand. /// /// The four selected elements in each operand are copied to the destination /// according to the bits specified in the immediate operand. The selected /// elements from the first 256-bit operand are copied to bits [63:0] and /// bits [191:128] of the destination, and the selected elements from the /// second 256-bit operand are copied to bits [127:64] and bits [255:192] of /// the destination. For example, if bits [7:0] of the immediate operand /// contain a value of 0xFF, the 256-bit destination vector would contain the /// following values: b[7], b[7], a[7], a[7], b[3], b[3], a[3], a[3]. /// /// \headerfile /// /// \code /// __m256 _mm256_shuffle_ps(__m256 a, __m256 b, const int mask); /// \endcode /// /// This intrinsic corresponds to the VSHUFPS instruction. /// /// \param a /// A 256-bit vector of [8 x float]. The four selected elements in this /// operand are copied to bits [63:0] and bits [191:128] in the destination, /// according to the bits specified in the immediate operand. /// \param b /// A 256-bit vector of [8 x float]. The four selected elements in this /// operand are copied to bits [127:64] and bits [255:192] in the /// destination, according to the bits specified in the immediate operand. /// \param mask /// An immediate value containing an 8-bit value specifying which elements to /// copy from \a a and \a b \n. /// Bits [3:0] specify the values copied from operand \a a. \n /// Bits [7:4] specify the values copied from operand \a b. \n /// The destinations within the 256-bit destination are assigned values as /// follows, according to the bit value assignments described below: \n /// Bits [1:0] are used to assign values to bits [31:0] and [159:128] in the /// destination. \n /// Bits [3:2] are used to assign values to bits [63:32] and [191:160] in the /// destination. \n /// Bits [5:4] are used to assign values to bits [95:64] and [223:192] in the /// destination. \n /// Bits [7:6] are used to assign values to bits [127:96] and [255:224] in /// the destination. \n /// Bit value assignments: \n /// 00: Bits [31:0] and [159:128] are copied from the selected operand. \n /// 01: Bits [63:32] and [191:160] are copied from the selected operand. \n /// 10: Bits [95:64] and [223:192] are copied from the selected operand. \n /// 11: Bits [127:96] and [255:224] are copied from the selected operand. \n /// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. /// _MM_SHUFFLE(b6, b4, b2, b0) can create an 8-bit mask of the form /// [b6, b4, b2, b0]. /// \returns A 256-bit vector of [8 x float] containing the shuffled values. #define _mm256_shuffle_ps(a, b, mask) \ ((__m256)__builtin_ia32_shufps256((__v8sf)(__m256)(a), \ (__v8sf)(__m256)(b), (int)(mask))) /// Selects four double-precision values from the 256-bit operands of /// [4 x double], as specified by the immediate value operand. /// /// The selected elements from the first 256-bit operand are copied to bits /// [63:0] and bits [191:128] in the destination, and the selected elements /// from the second 256-bit operand are copied to bits [127:64] and bits /// [255:192] in the destination. For example, if bits [3:0] of the immediate /// operand contain a value of 0xF, the 256-bit destination vector would /// contain the following values: b[3], a[3], b[1], a[1]. /// /// \headerfile /// /// \code /// __m256d _mm256_shuffle_pd(__m256d a, __m256d b, const int mask); /// \endcode /// /// This intrinsic corresponds to the VSHUFPD instruction. /// /// \param a /// A 256-bit vector of [4 x double]. /// \param b /// A 256-bit vector of [4 x double]. /// \param mask /// An immediate value containing 8-bit values specifying which elements to /// copy from \a a and \a b: \n /// Bit [0]=0: Bits [63:0] are copied from \a a to bits [63:0] of the /// destination. \n /// Bit [0]=1: Bits [127:64] are copied from \a a to bits [63:0] of the /// destination. \n /// Bit [1]=0: Bits [63:0] are copied from \a b to bits [127:64] of the /// destination. \n /// Bit [1]=1: Bits [127:64] are copied from \a b to bits [127:64] of the /// destination. \n /// Bit [2]=0: Bits [191:128] are copied from \a a to bits [191:128] of the /// destination. \n /// Bit [2]=1: Bits [255:192] are copied from \a a to bits [191:128] of the /// destination. \n /// Bit [3]=0: Bits [191:128] are copied from \a b to bits [255:192] of the /// destination. \n /// Bit [3]=1: Bits [255:192] are copied from \a b to bits [255:192] of the /// destination. /// \returns A 256-bit vector of [4 x double] containing the shuffled values. #define _mm256_shuffle_pd(a, b, mask) \ ((__m256d)__builtin_ia32_shufpd256((__v4df)(__m256d)(a), \ (__v4df)(__m256d)(b), (int)(mask))) /* Compare */ #define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */ #define _CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */ #define _CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */ #define _CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */ #define _CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */ #define _CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */ #define _CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */ #define _CMP_ORD_Q 0x07 /* Ordered (non-signaling) */ #define _CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */ #define _CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unordered, signaling) */ #define _CMP_NGT_US 0x0a /* Not-greater-than (unordered, signaling) */ #define _CMP_FALSE_OQ 0x0b /* False (ordered, non-signaling) */ #define _CMP_NEQ_OQ 0x0c /* Not-equal (ordered, non-signaling) */ #define _CMP_GE_OS 0x0d /* Greater-than-or-equal (ordered, signaling) */ #define _CMP_GT_OS 0x0e /* Greater-than (ordered, signaling) */ #define _CMP_TRUE_UQ 0x0f /* True (unordered, non-signaling) */ #define _CMP_EQ_OS 0x10 /* Equal (ordered, signaling) */ #define _CMP_LT_OQ 0x11 /* Less-than (ordered, non-signaling) */ #define _CMP_LE_OQ 0x12 /* Less-than-or-equal (ordered, non-signaling) */ #define _CMP_UNORD_S 0x13 /* Unordered (signaling) */ #define _CMP_NEQ_US 0x14 /* Not-equal (unordered, signaling) */ #define _CMP_NLT_UQ 0x15 /* Not-less-than (unordered, non-signaling) */ #define _CMP_NLE_UQ 0x16 /* Not-less-than-or-equal (unordered, non-signaling) */ #define _CMP_ORD_S 0x17 /* Ordered (signaling) */ #define _CMP_EQ_US 0x18 /* Equal (unordered, signaling) */ #define _CMP_NGE_UQ 0x19 /* Not-greater-than-or-equal (unordered, non-signaling) */ #define _CMP_NGT_UQ 0x1a /* Not-greater-than (unordered, non-signaling) */ #define _CMP_FALSE_OS 0x1b /* False (ordered, signaling) */ #define _CMP_NEQ_OS 0x1c /* Not-equal (ordered, signaling) */ #define _CMP_GE_OQ 0x1d /* Greater-than-or-equal (ordered, non-signaling) */ #define _CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */ #define _CMP_TRUE_US 0x1f /* True (unordered, signaling) */ /// Compares each of the corresponding double-precision values of two /// 128-bit vectors of [2 x double], using the operation specified by the /// immediate integer operand. /// /// Returns a [2 x double] vector consisting of two doubles corresponding to /// the two comparison results: zero if the comparison is false, and all 1's /// if the comparison is true. /// /// \headerfile /// /// \code /// __m128d _mm_cmp_pd(__m128d a, __m128d b, const int c); /// \endcode /// /// This intrinsic corresponds to the VCMPPD instruction. /// /// \param a /// A 128-bit vector of [2 x double]. /// \param b /// A 128-bit vector of [2 x double]. /// \param c /// An immediate integer operand, with bits [4:0] specifying which comparison /// operation to use: \n /// 0x00: Equal (ordered, non-signaling) \n /// 0x01: Less-than (ordered, signaling) \n /// 0x02: Less-than-or-equal (ordered, signaling) \n /// 0x03: Unordered (non-signaling) \n /// 0x04: Not-equal (unordered, non-signaling) \n /// 0x05: Not-less-than (unordered, signaling) \n /// 0x06: Not-less-than-or-equal (unordered, signaling) \n /// 0x07: Ordered (non-signaling) \n /// 0x08: Equal (unordered, non-signaling) \n /// 0x09: Not-greater-than-or-equal (unordered, signaling) \n /// 0x0A: Not-greater-than (unordered, signaling) \n /// 0x0B: False (ordered, non-signaling) \n /// 0x0C: Not-equal (ordered, non-signaling) \n /// 0x0D: Greater-than-or-equal (ordered, signaling) \n /// 0x0E: Greater-than (ordered, signaling) \n /// 0x0F: True (unordered, non-signaling) \n /// 0x10: Equal (ordered, signaling) \n /// 0x11: Less-than (ordered, non-signaling) \n /// 0x12: Less-than-or-equal (ordered, non-signaling) \n /// 0x13: Unordered (signaling) \n /// 0x14: Not-equal (unordered, signaling) \n /// 0x15: Not-less-than (unordered, non-signaling) \n /// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n /// 0x17: Ordered (signaling) \n /// 0x18: Equal (unordered, signaling) \n /// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n /// 0x1A: Not-greater-than (unordered, non-signaling) \n /// 0x1B: False (ordered, signaling) \n /// 0x1C: Not-equal (ordered, signaling) \n /// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n /// 0x1E: Greater-than (ordered, non-signaling) \n /// 0x1F: True (unordered, signaling) /// \returns A 128-bit vector of [2 x double] containing the comparison results. #define _mm_cmp_pd(a, b, c) \ ((__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), \ (__v2df)(__m128d)(b), (c))) /// Compares each of the corresponding values of two 128-bit vectors of /// [4 x float], using the operation specified by the immediate integer /// operand. /// /// Returns a [4 x float] vector consisting of four floats corresponding to /// the four comparison results: zero if the comparison is false, and all 1's /// if the comparison is true. /// /// \headerfile /// /// \code /// __m128 _mm_cmp_ps(__m128 a, __m128 b, const int c); /// \endcode /// /// This intrinsic corresponds to the VCMPPS instruction. /// /// \param a /// A 128-bit vector of [4 x float]. /// \param b /// A 128-bit vector of [4 x float]. /// \param c /// An immediate integer operand, with bits [4:0] specifying which comparison /// operation to use: \n /// 0x00: Equal (ordered, non-signaling) \n /// 0x01: Less-than (ordered, signaling) \n /// 0x02: Less-than-or-equal (ordered, signaling) \n /// 0x03: Unordered (non-signaling) \n /// 0x04: Not-equal (unordered, non-signaling) \n /// 0x05: Not-less-than (unordered, signaling) \n /// 0x06: Not-less-than-or-equal (unordered, signaling) \n /// 0x07: Ordered (non-signaling) \n /// 0x08: Equal (unordered, non-signaling) \n /// 0x09: Not-greater-than-or-equal (unordered, signaling) \n /// 0x0A: Not-greater-than (unordered, signaling) \n /// 0x0B: False (ordered, non-signaling) \n /// 0x0C: Not-equal (ordered, non-signaling) \n /// 0x0D: Greater-than-or-equal (ordered, signaling) \n /// 0x0E: Greater-than (ordered, signaling) \n /// 0x0F: True (unordered, non-signaling) \n /// 0x10: Equal (ordered, signaling) \n /// 0x11: Less-than (ordered, non-signaling) \n /// 0x12: Less-than-or-equal (ordered, non-signaling) \n /// 0x13: Unordered (signaling) \n /// 0x14: Not-equal (unordered, signaling) \n /// 0x15: Not-less-than (unordered, non-signaling) \n /// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n /// 0x17: Ordered (signaling) \n /// 0x18: Equal (unordered, signaling) \n /// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n /// 0x1A: Not-greater-than (unordered, non-signaling) \n /// 0x1B: False (ordered, signaling) \n /// 0x1C: Not-equal (ordered, signaling) \n /// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n /// 0x1E: Greater-than (ordered, non-signaling) \n /// 0x1F: True (unordered, signaling) /// \returns A 128-bit vector of [4 x float] containing the comparison results. #define _mm_cmp_ps(a, b, c) \ ((__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), \ (__v4sf)(__m128)(b), (c))) /// Compares each of the corresponding double-precision values of two /// 256-bit vectors of [4 x double], using the operation specified by the /// immediate integer operand. /// /// Returns a [4 x double] vector consisting of four doubles corresponding to /// the four comparison results: zero if the comparison is false, and all 1's /// if the comparison is true. /// /// \headerfile /// /// \code /// __m256d _mm256_cmp_pd(__m256d a, __m256d b, const int c); /// \endcode /// /// This intrinsic corresponds to the VCMPPD instruction. /// /// \param a /// A 256-bit vector of [4 x double]. /// \param b /// A 256-bit vector of [4 x double]. /// \param c /// An immediate integer operand, with bits [4:0] specifying which comparison /// operation to use: \n /// 0x00: Equal (ordered, non-signaling) \n /// 0x01: Less-than (ordered, signaling) \n /// 0x02: Less-than-or-equal (ordered, signaling) \n /// 0x03: Unordered (non-signaling) \n /// 0x04: Not-equal (unordered, non-signaling) \n /// 0x05: Not-less-than (unordered, signaling) \n /// 0x06: Not-less-than-or-equal (unordered, signaling) \n /// 0x07: Ordered (non-signaling) \n /// 0x08: Equal (unordered, non-signaling) \n /// 0x09: Not-greater-than-or-equal (unordered, signaling) \n /// 0x0A: Not-greater-than (unordered, signaling) \n /// 0x0B: False (ordered, non-signaling) \n /// 0x0C: Not-equal (ordered, non-signaling) \n /// 0x0D: Greater-than-or-equal (ordered, signaling) \n /// 0x0E: Greater-than (ordered, signaling) \n /// 0x0F: True (unordered, non-signaling) \n /// 0x10: Equal (ordered, signaling) \n /// 0x11: Less-than (ordered, non-signaling) \n /// 0x12: Less-than-or-equal (ordered, non-signaling) \n /// 0x13: Unordered (signaling) \n /// 0x14: Not-equal (unordered, signaling) \n /// 0x15: Not-less-than (unordered, non-signaling) \n /// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n /// 0x17: Ordered (signaling) \n /// 0x18: Equal (unordered, signaling) \n /// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n /// 0x1A: Not-greater-than (unordered, non-signaling) \n /// 0x1B: False (ordered, signaling) \n /// 0x1C: Not-equal (ordered, signaling) \n /// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n /// 0x1E: Greater-than (ordered, non-signaling) \n /// 0x1F: True (unordered, signaling) /// \returns A 256-bit vector of [4 x double] containing the comparison results. #define _mm256_cmp_pd(a, b, c) \ ((__m256d)__builtin_ia32_cmppd256((__v4df)(__m256d)(a), \ (__v4df)(__m256d)(b), (c))) /// Compares each of the corresponding values of two 256-bit vectors of /// [8 x float], using the operation specified by the immediate integer /// operand. /// /// Returns a [8 x float] vector consisting of eight floats corresponding to /// the eight comparison results: zero if the comparison is false, and all /// 1's if the comparison is true. /// /// \headerfile /// /// \code /// __m256 _mm256_cmp_ps(__m256 a, __m256 b, const int c); /// \endcode /// /// This intrinsic corresponds to the VCMPPS instruction. /// /// \param a /// A 256-bit vector of [8 x float]. /// \param b /// A 256-bit vector of [8 x float]. /// \param c /// An immediate integer operand, with bits [4:0] specifying which comparison /// operation to use: \n /// 0x00: Equal (ordered, non-signaling) \n /// 0x01: Less-than (ordered, signaling) \n /// 0x02: Less-than-or-equal (ordered, signaling) \n /// 0x03: Unordered (non-signaling) \n /// 0x04: Not-equal (unordered, non-signaling) \n /// 0x05: Not-less-than (unordered, signaling) \n /// 0x06: Not-less-than-or-equal (unordered, signaling) \n /// 0x07: Ordered (non-signaling) \n /// 0x08: Equal (unordered, non-signaling) \n /// 0x09: Not-greater-than-or-equal (unordered, signaling) \n /// 0x0A: Not-greater-than (unordered, signaling) \n /// 0x0B: False (ordered, non-signaling) \n /// 0x0C: Not-equal (ordered, non-signaling) \n /// 0x0D: Greater-than-or-equal (ordered, signaling) \n /// 0x0E: Greater-than (ordered, signaling) \n /// 0x0F: True (unordered, non-signaling) \n /// 0x10: Equal (ordered, signaling) \n /// 0x11: Less-than (ordered, non-signaling) \n /// 0x12: Less-than-or-equal (ordered, non-signaling) \n /// 0x13: Unordered (signaling) \n /// 0x14: Not-equal (unordered, signaling) \n /// 0x15: Not-less-than (unordered, non-signaling) \n /// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n /// 0x17: Ordered (signaling) \n /// 0x18: Equal (unordered, signaling) \n /// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n /// 0x1A: Not-greater-than (unordered, non-signaling) \n /// 0x1B: False (ordered, signaling) \n /// 0x1C: Not-equal (ordered, signaling) \n /// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n /// 0x1E: Greater-than (ordered, non-signaling) \n /// 0x1F: True (unordered, signaling) /// \returns A 256-bit vector of [8 x float] containing the comparison results. #define _mm256_cmp_ps(a, b, c) \ ((__m256)__builtin_ia32_cmpps256((__v8sf)(__m256)(a), \ (__v8sf)(__m256)(b), (c))) /// Compares each of the corresponding scalar double-precision values of /// two 128-bit vectors of [2 x double], using the operation specified by the /// immediate integer operand. /// /// If the result is true, all 64 bits of the destination vector are set; /// otherwise they are cleared. /// /// \headerfile /// /// \code /// __m128d _mm_cmp_sd(__m128d a, __m128d b, const int c); /// \endcode /// /// This intrinsic corresponds to the VCMPSD instruction. /// /// \param a /// A 128-bit vector of [2 x double]. /// \param b /// A 128-bit vector of [2 x double]. /// \param c /// An immediate integer operand, with bits [4:0] specifying which comparison /// operation to use: \n /// 0x00: Equal (ordered, non-signaling) \n /// 0x01: Less-than (ordered, signaling) \n /// 0x02: Less-than-or-equal (ordered, signaling) \n /// 0x03: Unordered (non-signaling) \n /// 0x04: Not-equal (unordered, non-signaling) \n /// 0x05: Not-less-than (unordered, signaling) \n /// 0x06: Not-less-than-or-equal (unordered, signaling) \n /// 0x07: Ordered (non-signaling) \n /// 0x08: Equal (unordered, non-signaling) \n /// 0x09: Not-greater-than-or-equal (unordered, signaling) \n /// 0x0A: Not-greater-than (unordered, signaling) \n /// 0x0B: False (ordered, non-signaling) \n /// 0x0C: Not-equal (ordered, non-signaling) \n /// 0x0D: Greater-than-or-equal (ordered, signaling) \n /// 0x0E: Greater-than (ordered, signaling) \n /// 0x0F: True (unordered, non-signaling) \n /// 0x10: Equal (ordered, signaling) \n /// 0x11: Less-than (ordered, non-signaling) \n /// 0x12: Less-than-or-equal (ordered, non-signaling) \n /// 0x13: Unordered (signaling) \n /// 0x14: Not-equal (unordered, signaling) \n /// 0x15: Not-less-than (unordered, non-signaling) \n /// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n /// 0x17: Ordered (signaling) \n /// 0x18: Equal (unordered, signaling) \n /// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n /// 0x1A: Not-greater-than (unordered, non-signaling) \n /// 0x1B: False (ordered, signaling) \n /// 0x1C: Not-equal (ordered, signaling) \n /// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n /// 0x1E: Greater-than (ordered, non-signaling) \n /// 0x1F: True (unordered, signaling) /// \returns A 128-bit vector of [2 x double] containing the comparison results. #define _mm_cmp_sd(a, b, c) \ ((__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), \ (__v2df)(__m128d)(b), (c))) /// Compares each of the corresponding scalar values of two 128-bit /// vectors of [4 x float], using the operation specified by the immediate /// integer operand. /// /// If the result is true, all 32 bits of the destination vector are set; /// otherwise they are cleared. /// /// \headerfile /// /// \code /// __m128 _mm_cmp_ss(__m128 a, __m128 b, const int c); /// \endcode /// /// This intrinsic corresponds to the VCMPSS instruction. /// /// \param a /// A 128-bit vector of [4 x float]. /// \param b /// A 128-bit vector of [4 x float]. /// \param c /// An immediate integer operand, with bits [4:0] specifying which comparison /// operation to use: \n /// 0x00: Equal (ordered, non-signaling) \n /// 0x01: Less-than (ordered, signaling) \n /// 0x02: Less-than-or-equal (ordered, signaling) \n /// 0x03: Unordered (non-signaling) \n /// 0x04: Not-equal (unordered, non-signaling) \n /// 0x05: Not-less-than (unordered, signaling) \n /// 0x06: Not-less-than-or-equal (unordered, signaling) \n /// 0x07: Ordered (non-signaling) \n /// 0x08: Equal (unordered, non-signaling) \n /// 0x09: Not-greater-than-or-equal (unordered, signaling) \n /// 0x0A: Not-greater-than (unordered, signaling) \n /// 0x0B: False (ordered, non-signaling) \n /// 0x0C: Not-equal (ordered, non-signaling) \n /// 0x0D: Greater-than-or-equal (ordered, signaling) \n /// 0x0E: Greater-than (ordered, signaling) \n /// 0x0F: True (unordered, non-signaling) \n /// 0x10: Equal (ordered, signaling) \n /// 0x11: Less-than (ordered, non-signaling) \n /// 0x12: Less-than-or-equal (ordered, non-signaling) \n /// 0x13: Unordered (signaling) \n /// 0x14: Not-equal (unordered, signaling) \n /// 0x15: Not-less-than (unordered, non-signaling) \n /// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n /// 0x17: Ordered (signaling) \n /// 0x18: Equal (unordered, signaling) \n /// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n /// 0x1A: Not-greater-than (unordered, non-signaling) \n /// 0x1B: False (ordered, signaling) \n /// 0x1C: Not-equal (ordered, signaling) \n /// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n /// 0x1E: Greater-than (ordered, non-signaling) \n /// 0x1F: True (unordered, signaling) /// \returns A 128-bit vector of [4 x float] containing the comparison results. #define _mm_cmp_ss(a, b, c) \ ((__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), \ (__v4sf)(__m128)(b), (c))) /// Takes a [8 x i32] vector and returns the vector element value /// indexed by the immediate constant operand. /// /// \headerfile /// /// \code /// int _mm256_extract_epi32(__m256i X, const int N); /// \endcode /// /// This intrinsic corresponds to the VEXTRACTF128+COMPOSITE /// instruction. /// /// \param X /// A 256-bit vector of [8 x i32]. /// \param N /// An immediate integer operand with bits [2:0] determining which vector /// element is extracted and returned. /// \returns A 32-bit integer containing the extracted 32 bits of extended /// packed data. #define _mm256_extract_epi32(X, N) \ ((int)__builtin_ia32_vec_ext_v8si((__v8si)(__m256i)(X), (int)(N))) /// Takes a [16 x i16] vector and returns the vector element value /// indexed by the immediate constant operand. /// /// \headerfile /// /// \code /// int _mm256_extract_epi16(__m256i X, const int N); /// \endcode /// /// This intrinsic corresponds to the VEXTRACTF128+COMPOSITE /// instruction. /// /// \param X /// A 256-bit integer vector of [16 x i16]. /// \param N /// An immediate integer operand with bits [3:0] determining which vector /// element is extracted and returned. /// \returns A 32-bit integer containing the extracted 16 bits of zero extended /// packed data. #define _mm256_extract_epi16(X, N) \ ((int)(unsigned short)__builtin_ia32_vec_ext_v16hi((__v16hi)(__m256i)(X), \ (int)(N))) /// Takes a [32 x i8] vector and returns the vector element value /// indexed by the immediate constant operand. /// /// \headerfile /// /// \code /// int _mm256_extract_epi8(__m256i X, const int N); /// \endcode /// /// This intrinsic corresponds to the VEXTRACTF128+COMPOSITE /// instruction. /// /// \param X /// A 256-bit integer vector of [32 x i8]. /// \param N /// An immediate integer operand with bits [4:0] determining which vector /// element is extracted and returned. /// \returns A 32-bit integer containing the extracted 8 bits of zero extended /// packed data. #define _mm256_extract_epi8(X, N) \ ((int)(unsigned char)__builtin_ia32_vec_ext_v32qi((__v32qi)(__m256i)(X), \ (int)(N))) #ifdef __x86_64__ /// Takes a [4 x i64] vector and returns the vector element value /// indexed by the immediate constant operand. /// /// \headerfile /// /// \code /// long long _mm256_extract_epi64(__m256i X, const int N); /// \endcode /// /// This intrinsic corresponds to the VEXTRACTF128+COMPOSITE /// instruction. /// /// \param X /// A 256-bit integer vector of [4 x i64]. /// \param N /// An immediate integer operand with bits [1:0] determining which vector /// element is extracted and returned. /// \returns A 64-bit integer containing the extracted 64 bits of extended /// packed data. #define _mm256_extract_epi64(X, N) \ ((long long)__builtin_ia32_vec_ext_v4di((__v4di)(__m256i)(X), (int)(N))) #endif /// Takes a [8 x i32] vector and replaces the vector element value /// indexed by the immediate constant operand by a new value. Returns the /// modified vector. /// /// \headerfile /// /// \code /// __m256i _mm256_insert_epi32(__m256i X, int I, const int N); /// \endcode /// /// This intrinsic corresponds to the VINSERTF128+COMPOSITE /// instruction. /// /// \param X /// A vector of [8 x i32] to be used by the insert operation. /// \param I /// An integer value. The replacement value for the insert operation. /// \param N /// An immediate integer specifying the index of the vector element to be /// replaced. /// \returns A copy of vector \a X, after replacing its element indexed by /// \a N with \a I. #define _mm256_insert_epi32(X, I, N) \ ((__m256i)__builtin_ia32_vec_set_v8si((__v8si)(__m256i)(X), \ (int)(I), (int)(N))) /// Takes a [16 x i16] vector and replaces the vector element value /// indexed by the immediate constant operand with a new value. Returns the /// modified vector. /// /// \headerfile /// /// \code /// __m256i _mm256_insert_epi16(__m256i X, int I, const int N); /// \endcode /// /// This intrinsic corresponds to the VINSERTF128+COMPOSITE /// instruction. /// /// \param X /// A vector of [16 x i16] to be used by the insert operation. /// \param I /// An i16 integer value. The replacement value for the insert operation. /// \param N /// An immediate integer specifying the index of the vector element to be /// replaced. /// \returns A copy of vector \a X, after replacing its element indexed by /// \a N with \a I. #define _mm256_insert_epi16(X, I, N) \ ((__m256i)__builtin_ia32_vec_set_v16hi((__v16hi)(__m256i)(X), \ (int)(I), (int)(N))) /// Takes a [32 x i8] vector and replaces the vector element value /// indexed by the immediate constant operand with a new value. Returns the /// modified vector. /// /// \headerfile /// /// \code /// __m256i _mm256_insert_epi8(__m256i X, int I, const int N); /// \endcode /// /// This intrinsic corresponds to the VINSERTF128+COMPOSITE /// instruction. /// /// \param X /// A vector of [32 x i8] to be used by the insert operation. /// \param I /// An i8 integer value. The replacement value for the insert operation. /// \param N /// An immediate integer specifying the index of the vector element to be /// replaced. /// \returns A copy of vector \a X, after replacing its element indexed by /// \a N with \a I. #define _mm256_insert_epi8(X, I, N) \ ((__m256i)__builtin_ia32_vec_set_v32qi((__v32qi)(__m256i)(X), \ (int)(I), (int)(N))) #ifdef __x86_64__ /// Takes a [4 x i64] vector and replaces the vector element value /// indexed by the immediate constant operand with a new value. Returns the /// modified vector. /// /// \headerfile /// /// \code /// __m256i _mm256_insert_epi64(__m256i X, int I, const int N); /// \endcode /// /// This intrinsic corresponds to the VINSERTF128+COMPOSITE /// instruction. /// /// \param X /// A vector of [4 x i64] to be used by the insert operation. /// \param I /// A 64-bit integer value. The replacement value for the insert operation. /// \param N /// An immediate integer specifying the index of the vector element to be /// replaced. /// \returns A copy of vector \a X, after replacing its element indexed by /// \a N with \a I. #define _mm256_insert_epi64(X, I, N) \ ((__m256i)__builtin_ia32_vec_set_v4di((__v4di)(__m256i)(X), \ (long long)(I), (int)(N))) #endif /* Conversion */ /// Converts a vector of [4 x i32] into a vector of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTDQ2PD instruction. /// /// \param __a /// A 128-bit integer vector of [4 x i32]. /// \returns A 256-bit vector of [4 x double] containing the converted values. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_cvtepi32_pd(__m128i __a) { return (__m256d)__builtin_convertvector((__v4si)__a, __v4df); } /// Converts a vector of [8 x i32] into a vector of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTDQ2PS instruction. /// /// \param __a /// A 256-bit integer vector. /// \returns A 256-bit vector of [8 x float] containing the converted values. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_cvtepi32_ps(__m256i __a) { return (__m256)__builtin_convertvector((__v8si)__a, __v8sf); } /// Converts a 256-bit vector of [4 x double] into a 128-bit vector of /// [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTPD2PS instruction. /// /// \param __a /// A 256-bit vector of [4 x double]. /// \returns A 128-bit vector of [4 x float] containing the converted values. static __inline __m128 __DEFAULT_FN_ATTRS _mm256_cvtpd_ps(__m256d __a) { return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) __a); } /// Converts a vector of [8 x float] into a vector of [8 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTPS2DQ instruction. /// /// \param __a /// A 256-bit vector of [8 x float]. /// \returns A 256-bit integer vector containing the converted values. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_cvtps_epi32(__m256 __a) { return (__m256i)__builtin_ia32_cvtps2dq256((__v8sf) __a); } /// Converts a 128-bit vector of [4 x float] into a 256-bit vector of [4 /// x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTPS2PD instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \returns A 256-bit vector of [4 x double] containing the converted values. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_cvtps_pd(__m128 __a) { return (__m256d)__builtin_convertvector((__v4sf)__a, __v4df); } /// Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 /// x i32], truncating the result by rounding towards zero when it is /// inexact. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTTPD2DQ instruction. /// /// \param __a /// A 256-bit vector of [4 x double]. /// \returns A 128-bit integer vector containing the converted values. static __inline __m128i __DEFAULT_FN_ATTRS _mm256_cvttpd_epi32(__m256d __a) { return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a); } /// Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 /// x i32]. When a conversion is inexact, the value returned is rounded /// according to the rounding control bits in the MXCSR register. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTPD2DQ instruction. /// /// \param __a /// A 256-bit vector of [4 x double]. /// \returns A 128-bit integer vector containing the converted values. static __inline __m128i __DEFAULT_FN_ATTRS _mm256_cvtpd_epi32(__m256d __a) { return (__m128i)__builtin_ia32_cvtpd2dq256((__v4df) __a); } /// Converts a vector of [8 x float] into a vector of [8 x i32], /// truncating the result by rounding towards zero when it is inexact. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTTPS2DQ instruction. /// /// \param __a /// A 256-bit vector of [8 x float]. /// \returns A 256-bit integer vector containing the converted values. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_cvttps_epi32(__m256 __a) { return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a); } /// Returns the first element of the input vector of [4 x double]. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __a /// A 256-bit vector of [4 x double]. /// \returns A 64 bit double containing the first element of the input vector. static __inline double __DEFAULT_FN_ATTRS _mm256_cvtsd_f64(__m256d __a) { return __a[0]; } /// Returns the first element of the input vector of [8 x i32]. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __a /// A 256-bit vector of [8 x i32]. /// \returns A 32 bit integer containing the first element of the input vector. static __inline int __DEFAULT_FN_ATTRS _mm256_cvtsi256_si32(__m256i __a) { __v8si __b = (__v8si)__a; return __b[0]; } /// Returns the first element of the input vector of [8 x float]. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __a /// A 256-bit vector of [8 x float]. /// \returns A 32 bit float containing the first element of the input vector. static __inline float __DEFAULT_FN_ATTRS _mm256_cvtss_f32(__m256 __a) { return __a[0]; } /* Vector replicate */ /// Moves and duplicates odd-indexed values from a 256-bit vector of /// [8 x float] to float values in a 256-bit vector of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVSHDUP instruction. /// /// \param __a /// A 256-bit vector of [8 x float]. \n /// Bits [255:224] of \a __a are written to bits [255:224] and [223:192] of /// the return value. \n /// Bits [191:160] of \a __a are written to bits [191:160] and [159:128] of /// the return value. \n /// Bits [127:96] of \a __a are written to bits [127:96] and [95:64] of the /// return value. \n /// Bits [63:32] of \a __a are written to bits [63:32] and [31:0] of the /// return value. /// \returns A 256-bit vector of [8 x float] containing the moved and duplicated /// values. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_movehdup_ps(__m256 __a) { return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 1, 1, 3, 3, 5, 5, 7, 7); } /// Moves and duplicates even-indexed values from a 256-bit vector of /// [8 x float] to float values in a 256-bit vector of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVSLDUP instruction. /// /// \param __a /// A 256-bit vector of [8 x float]. \n /// Bits [223:192] of \a __a are written to bits [255:224] and [223:192] of /// the return value. \n /// Bits [159:128] of \a __a are written to bits [191:160] and [159:128] of /// the return value. \n /// Bits [95:64] of \a __a are written to bits [127:96] and [95:64] of the /// return value. \n /// Bits [31:0] of \a __a are written to bits [63:32] and [31:0] of the /// return value. /// \returns A 256-bit vector of [8 x float] containing the moved and duplicated /// values. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_moveldup_ps(__m256 __a) { return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 0, 2, 2, 4, 4, 6, 6); } /// Moves and duplicates double-precision floating point values from a /// 256-bit vector of [4 x double] to double-precision values in a 256-bit /// vector of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVDDUP instruction. /// /// \param __a /// A 256-bit vector of [4 x double]. \n /// Bits [63:0] of \a __a are written to bits [127:64] and [63:0] of the /// return value. \n /// Bits [191:128] of \a __a are written to bits [255:192] and [191:128] of /// the return value. /// \returns A 256-bit vector of [4 x double] containing the moved and /// duplicated values. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_movedup_pd(__m256d __a) { return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 0, 2, 2); } /* Unpack and Interleave */ /// Unpacks the odd-indexed vector elements from two 256-bit vectors of /// [4 x double] and interleaves them into a 256-bit vector of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VUNPCKHPD instruction. /// /// \param __a /// A 256-bit floating-point vector of [4 x double]. \n /// Bits [127:64] are written to bits [63:0] of the return value. \n /// Bits [255:192] are written to bits [191:128] of the return value. \n /// \param __b /// A 256-bit floating-point vector of [4 x double]. \n /// Bits [127:64] are written to bits [127:64] of the return value. \n /// Bits [255:192] are written to bits [255:192] of the return value. \n /// \returns A 256-bit vector of [4 x double] containing the interleaved values. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_unpackhi_pd(__m256d __a, __m256d __b) { return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 1, 5, 1+2, 5+2); } /// Unpacks the even-indexed vector elements from two 256-bit vectors of /// [4 x double] and interleaves them into a 256-bit vector of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VUNPCKLPD instruction. /// /// \param __a /// A 256-bit floating-point vector of [4 x double]. \n /// Bits [63:0] are written to bits [63:0] of the return value. \n /// Bits [191:128] are written to bits [191:128] of the return value. /// \param __b /// A 256-bit floating-point vector of [4 x double]. \n /// Bits [63:0] are written to bits [127:64] of the return value. \n /// Bits [191:128] are written to bits [255:192] of the return value. \n /// \returns A 256-bit vector of [4 x double] containing the interleaved values. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_unpacklo_pd(__m256d __a, __m256d __b) { return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 0, 4, 0+2, 4+2); } /// Unpacks the 32-bit vector elements 2, 3, 6 and 7 from each of the /// two 256-bit vectors of [8 x float] and interleaves them into a 256-bit /// vector of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VUNPCKHPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float]. \n /// Bits [95:64] are written to bits [31:0] of the return value. \n /// Bits [127:96] are written to bits [95:64] of the return value. \n /// Bits [223:192] are written to bits [159:128] of the return value. \n /// Bits [255:224] are written to bits [223:192] of the return value. /// \param __b /// A 256-bit vector of [8 x float]. \n /// Bits [95:64] are written to bits [63:32] of the return value. \n /// Bits [127:96] are written to bits [127:96] of the return value. \n /// Bits [223:192] are written to bits [191:160] of the return value. \n /// Bits [255:224] are written to bits [255:224] of the return value. /// \returns A 256-bit vector of [8 x float] containing the interleaved values. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_unpackhi_ps(__m256 __a, __m256 __b) { return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1); } /// Unpacks the 32-bit vector elements 0, 1, 4 and 5 from each of the /// two 256-bit vectors of [8 x float] and interleaves them into a 256-bit /// vector of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VUNPCKLPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float]. \n /// Bits [31:0] are written to bits [31:0] of the return value. \n /// Bits [63:32] are written to bits [95:64] of the return value. \n /// Bits [159:128] are written to bits [159:128] of the return value. \n /// Bits [191:160] are written to bits [223:192] of the return value. /// \param __b /// A 256-bit vector of [8 x float]. \n /// Bits [31:0] are written to bits [63:32] of the return value. \n /// Bits [63:32] are written to bits [127:96] of the return value. \n /// Bits [159:128] are written to bits [191:160] of the return value. \n /// Bits [191:160] are written to bits [255:224] of the return value. /// \returns A 256-bit vector of [8 x float] containing the interleaved values. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_unpacklo_ps(__m256 __a, __m256 __b) { return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 0, 8, 0+1, 8+1, 4, 12, 4+1, 12+1); } /* Bit Test */ /// Given two 128-bit floating-point vectors of [2 x double], perform an /// element-by-element comparison of the double-precision element in the /// first source vector and the corresponding element in the second source /// vector. /// /// The EFLAGS register is updated as follows: \n /// If there is at least one pair of double-precision elements where the /// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the /// ZF flag is set to 1. \n /// If there is at least one pair of double-precision elements where the /// sign-bit of the first element is 0 and the sign-bit of the second element /// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n /// This intrinsic returns the value of the ZF flag. /// /// \headerfile /// /// This intrinsic corresponds to the VTESTPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \param __b /// A 128-bit vector of [2 x double]. /// \returns the ZF flag in the EFLAGS register. static __inline int __DEFAULT_FN_ATTRS128 _mm_testz_pd(__m128d __a, __m128d __b) { return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b); } /// Given two 128-bit floating-point vectors of [2 x double], perform an /// element-by-element comparison of the double-precision element in the /// first source vector and the corresponding element in the second source /// vector. /// /// The EFLAGS register is updated as follows: \n /// If there is at least one pair of double-precision elements where the /// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the /// ZF flag is set to 1. \n /// If there is at least one pair of double-precision elements where the /// sign-bit of the first element is 0 and the sign-bit of the second element /// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n /// This intrinsic returns the value of the CF flag. /// /// \headerfile /// /// This intrinsic corresponds to the VTESTPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \param __b /// A 128-bit vector of [2 x double]. /// \returns the CF flag in the EFLAGS register. static __inline int __DEFAULT_FN_ATTRS128 _mm_testc_pd(__m128d __a, __m128d __b) { return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b); } /// Given two 128-bit floating-point vectors of [2 x double], perform an /// element-by-element comparison of the double-precision element in the /// first source vector and the corresponding element in the second source /// vector. /// /// The EFLAGS register is updated as follows: \n /// If there is at least one pair of double-precision elements where the /// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the /// ZF flag is set to 1. \n /// If there is at least one pair of double-precision elements where the /// sign-bit of the first element is 0 and the sign-bit of the second element /// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n /// This intrinsic returns 1 if both the ZF and CF flags are set to 0, /// otherwise it returns 0. /// /// \headerfile /// /// This intrinsic corresponds to the VTESTPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \param __b /// A 128-bit vector of [2 x double]. /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0. static __inline int __DEFAULT_FN_ATTRS128 _mm_testnzc_pd(__m128d __a, __m128d __b) { return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b); } /// Given two 128-bit floating-point vectors of [4 x float], perform an /// element-by-element comparison of the single-precision element in the /// first source vector and the corresponding element in the second source /// vector. /// /// The EFLAGS register is updated as follows: \n /// If there is at least one pair of single-precision elements where the /// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the /// ZF flag is set to 1. \n /// If there is at least one pair of single-precision elements where the /// sign-bit of the first element is 0 and the sign-bit of the second element /// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n /// This intrinsic returns the value of the ZF flag. /// /// \headerfile /// /// This intrinsic corresponds to the VTESTPS instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 128-bit vector of [4 x float]. /// \returns the ZF flag. static __inline int __DEFAULT_FN_ATTRS128 _mm_testz_ps(__m128 __a, __m128 __b) { return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b); } /// Given two 128-bit floating-point vectors of [4 x float], perform an /// element-by-element comparison of the single-precision element in the /// first source vector and the corresponding element in the second source /// vector. /// /// The EFLAGS register is updated as follows: \n /// If there is at least one pair of single-precision elements where the /// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the /// ZF flag is set to 1. \n /// If there is at least one pair of single-precision elements where the /// sign-bit of the first element is 0 and the sign-bit of the second element /// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n /// This intrinsic returns the value of the CF flag. /// /// \headerfile /// /// This intrinsic corresponds to the VTESTPS instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 128-bit vector of [4 x float]. /// \returns the CF flag. static __inline int __DEFAULT_FN_ATTRS128 _mm_testc_ps(__m128 __a, __m128 __b) { return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b); } /// Given two 128-bit floating-point vectors of [4 x float], perform an /// element-by-element comparison of the single-precision element in the /// first source vector and the corresponding element in the second source /// vector. /// /// The EFLAGS register is updated as follows: \n /// If there is at least one pair of single-precision elements where the /// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the /// ZF flag is set to 1. \n /// If there is at least one pair of single-precision elements where the /// sign-bit of the first element is 0 and the sign-bit of the second element /// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n /// This intrinsic returns 1 if both the ZF and CF flags are set to 0, /// otherwise it returns 0. /// /// \headerfile /// /// This intrinsic corresponds to the VTESTPS instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 128-bit vector of [4 x float]. /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0. static __inline int __DEFAULT_FN_ATTRS128 _mm_testnzc_ps(__m128 __a, __m128 __b) { return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b); } /// Given two 256-bit floating-point vectors of [4 x double], perform an /// element-by-element comparison of the double-precision elements in the /// first source vector and the corresponding elements in the second source /// vector. /// /// The EFLAGS register is updated as follows: \n /// If there is at least one pair of double-precision elements where the /// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the /// ZF flag is set to 1. \n /// If there is at least one pair of double-precision elements where the /// sign-bit of the first element is 0 and the sign-bit of the second element /// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n /// This intrinsic returns the value of the ZF flag. /// /// \headerfile /// /// This intrinsic corresponds to the VTESTPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double]. /// \param __b /// A 256-bit vector of [4 x double]. /// \returns the ZF flag. static __inline int __DEFAULT_FN_ATTRS _mm256_testz_pd(__m256d __a, __m256d __b) { return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b); } /// Given two 256-bit floating-point vectors of [4 x double], perform an /// element-by-element comparison of the double-precision elements in the /// first source vector and the corresponding elements in the second source /// vector. /// /// The EFLAGS register is updated as follows: \n /// If there is at least one pair of double-precision elements where the /// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the /// ZF flag is set to 1. \n /// If there is at least one pair of double-precision elements where the /// sign-bit of the first element is 0 and the sign-bit of the second element /// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n /// This intrinsic returns the value of the CF flag. /// /// \headerfile /// /// This intrinsic corresponds to the VTESTPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double]. /// \param __b /// A 256-bit vector of [4 x double]. /// \returns the CF flag. static __inline int __DEFAULT_FN_ATTRS _mm256_testc_pd(__m256d __a, __m256d __b) { return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b); } /// Given two 256-bit floating-point vectors of [4 x double], perform an /// element-by-element comparison of the double-precision elements in the /// first source vector and the corresponding elements in the second source /// vector. /// /// The EFLAGS register is updated as follows: \n /// If there is at least one pair of double-precision elements where the /// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the /// ZF flag is set to 1. \n /// If there is at least one pair of double-precision elements where the /// sign-bit of the first element is 0 and the sign-bit of the second element /// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n /// This intrinsic returns 1 if both the ZF and CF flags are set to 0, /// otherwise it returns 0. /// /// \headerfile /// /// This intrinsic corresponds to the VTESTPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double]. /// \param __b /// A 256-bit vector of [4 x double]. /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0. static __inline int __DEFAULT_FN_ATTRS _mm256_testnzc_pd(__m256d __a, __m256d __b) { return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b); } /// Given two 256-bit floating-point vectors of [8 x float], perform an /// element-by-element comparison of the single-precision element in the /// first source vector and the corresponding element in the second source /// vector. /// /// The EFLAGS register is updated as follows: \n /// If there is at least one pair of single-precision elements where the /// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the /// ZF flag is set to 1. \n /// If there is at least one pair of single-precision elements where the /// sign-bit of the first element is 0 and the sign-bit of the second element /// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n /// This intrinsic returns the value of the ZF flag. /// /// \headerfile /// /// This intrinsic corresponds to the VTESTPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float]. /// \param __b /// A 256-bit vector of [8 x float]. /// \returns the ZF flag. static __inline int __DEFAULT_FN_ATTRS _mm256_testz_ps(__m256 __a, __m256 __b) { return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b); } /// Given two 256-bit floating-point vectors of [8 x float], perform an /// element-by-element comparison of the single-precision element in the /// first source vector and the corresponding element in the second source /// vector. /// /// The EFLAGS register is updated as follows: \n /// If there is at least one pair of single-precision elements where the /// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the /// ZF flag is set to 1. \n /// If there is at least one pair of single-precision elements where the /// sign-bit of the first element is 0 and the sign-bit of the second element /// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n /// This intrinsic returns the value of the CF flag. /// /// \headerfile /// /// This intrinsic corresponds to the VTESTPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float]. /// \param __b /// A 256-bit vector of [8 x float]. /// \returns the CF flag. static __inline int __DEFAULT_FN_ATTRS _mm256_testc_ps(__m256 __a, __m256 __b) { return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b); } /// Given two 256-bit floating-point vectors of [8 x float], perform an /// element-by-element comparison of the single-precision elements in the /// first source vector and the corresponding elements in the second source /// vector. /// /// The EFLAGS register is updated as follows: \n /// If there is at least one pair of single-precision elements where the /// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the /// ZF flag is set to 1. \n /// If there is at least one pair of single-precision elements where the /// sign-bit of the first element is 0 and the sign-bit of the second element /// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n /// This intrinsic returns 1 if both the ZF and CF flags are set to 0, /// otherwise it returns 0. /// /// \headerfile /// /// This intrinsic corresponds to the VTESTPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float]. /// \param __b /// A 256-bit vector of [8 x float]. /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0. static __inline int __DEFAULT_FN_ATTRS _mm256_testnzc_ps(__m256 __a, __m256 __b) { return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b); } /// Given two 256-bit integer vectors, perform a bit-by-bit comparison /// of the two source vectors. /// /// The EFLAGS register is updated as follows: \n /// If there is at least one pair of bits where both bits are 1, the ZF flag /// is set to 0. Otherwise the ZF flag is set to 1. \n /// If there is at least one pair of bits where the bit from the first source /// vector is 0 and the bit from the second source vector is 1, the CF flag /// is set to 0. Otherwise the CF flag is set to 1. \n /// This intrinsic returns the value of the ZF flag. /// /// \headerfile /// /// This intrinsic corresponds to the VPTEST instruction. /// /// \param __a /// A 256-bit integer vector. /// \param __b /// A 256-bit integer vector. /// \returns the ZF flag. static __inline int __DEFAULT_FN_ATTRS _mm256_testz_si256(__m256i __a, __m256i __b) { return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b); } /// Given two 256-bit integer vectors, perform a bit-by-bit comparison /// of the two source vectors. /// /// The EFLAGS register is updated as follows: \n /// If there is at least one pair of bits where both bits are 1, the ZF flag /// is set to 0. Otherwise the ZF flag is set to 1. \n /// If there is at least one pair of bits where the bit from the first source /// vector is 0 and the bit from the second source vector is 1, the CF flag /// is set to 0. Otherwise the CF flag is set to 1. \n /// This intrinsic returns the value of the CF flag. /// /// \headerfile /// /// This intrinsic corresponds to the VPTEST instruction. /// /// \param __a /// A 256-bit integer vector. /// \param __b /// A 256-bit integer vector. /// \returns the CF flag. static __inline int __DEFAULT_FN_ATTRS _mm256_testc_si256(__m256i __a, __m256i __b) { return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b); } /// Given two 256-bit integer vectors, perform a bit-by-bit comparison /// of the two source vectors. /// /// The EFLAGS register is updated as follows: \n /// If there is at least one pair of bits where both bits are 1, the ZF flag /// is set to 0. Otherwise the ZF flag is set to 1. \n /// If there is at least one pair of bits where the bit from the first source /// vector is 0 and the bit from the second source vector is 1, the CF flag /// is set to 0. Otherwise the CF flag is set to 1. \n /// This intrinsic returns 1 if both the ZF and CF flags are set to 0, /// otherwise it returns 0. /// /// \headerfile /// /// This intrinsic corresponds to the VPTEST instruction. /// /// \param __a /// A 256-bit integer vector. /// \param __b /// A 256-bit integer vector. /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0. static __inline int __DEFAULT_FN_ATTRS _mm256_testnzc_si256(__m256i __a, __m256i __b) { return __builtin_ia32_ptestnzc256((__v4di)__a, (__v4di)__b); } /* Vector extract sign mask */ /// Extracts the sign bits of double-precision floating point elements /// in a 256-bit vector of [4 x double] and writes them to the lower order /// bits of the return value. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVMSKPD instruction. /// /// \param __a /// A 256-bit vector of [4 x double] containing the double-precision /// floating point values with sign bits to be extracted. /// \returns The sign bits from the operand, written to bits [3:0]. static __inline int __DEFAULT_FN_ATTRS _mm256_movemask_pd(__m256d __a) { return __builtin_ia32_movmskpd256((__v4df)__a); } /// Extracts the sign bits of single-precision floating point elements /// in a 256-bit vector of [8 x float] and writes them to the lower order /// bits of the return value. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVMSKPS instruction. /// /// \param __a /// A 256-bit vector of [8 x float] containing the single-precision floating /// point values with sign bits to be extracted. /// \returns The sign bits from the operand, written to bits [7:0]. static __inline int __DEFAULT_FN_ATTRS _mm256_movemask_ps(__m256 __a) { return __builtin_ia32_movmskps256((__v8sf)__a); } /* Vector __zero */ /// Zeroes the contents of all XMM or YMM registers. /// /// \headerfile /// /// This intrinsic corresponds to the VZEROALL instruction. static __inline void __attribute__((__always_inline__, __nodebug__, __target__("avx"))) _mm256_zeroall(void) { __builtin_ia32_vzeroall(); } /// Zeroes the upper 128 bits (bits 255:128) of all YMM registers. /// /// \headerfile /// /// This intrinsic corresponds to the VZEROUPPER instruction. static __inline void __attribute__((__always_inline__, __nodebug__, __target__("avx"))) _mm256_zeroupper(void) { __builtin_ia32_vzeroupper(); } /* Vector load with broadcast */ /// Loads a scalar single-precision floating point value from the /// specified address pointed to by \a __a and broadcasts it to the elements /// of a [4 x float] vector. /// /// \headerfile /// /// This intrinsic corresponds to the VBROADCASTSS instruction. /// /// \param __a /// The single-precision floating point value to be broadcast. /// \returns A 128-bit vector of [4 x float] whose 32-bit elements are set /// equal to the broadcast value. static __inline __m128 __DEFAULT_FN_ATTRS128 _mm_broadcast_ss(float const *__a) { struct __mm_broadcast_ss_struct { float __f; } __attribute__((__packed__, __may_alias__)); float __f = ((const struct __mm_broadcast_ss_struct*)__a)->__f; return __extension__ (__m128){ __f, __f, __f, __f }; } /// Loads a scalar double-precision floating point value from the /// specified address pointed to by \a __a and broadcasts it to the elements /// of a [4 x double] vector. /// /// \headerfile /// /// This intrinsic corresponds to the VBROADCASTSD instruction. /// /// \param __a /// The double-precision floating point value to be broadcast. /// \returns A 256-bit vector of [4 x double] whose 64-bit elements are set /// equal to the broadcast value. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_broadcast_sd(double const *__a) { struct __mm256_broadcast_sd_struct { double __d; } __attribute__((__packed__, __may_alias__)); double __d = ((const struct __mm256_broadcast_sd_struct*)__a)->__d; return __extension__ (__m256d)(__v4df){ __d, __d, __d, __d }; } /// Loads a scalar single-precision floating point value from the /// specified address pointed to by \a __a and broadcasts it to the elements /// of a [8 x float] vector. /// /// \headerfile /// /// This intrinsic corresponds to the VBROADCASTSS instruction. /// /// \param __a /// The single-precision floating point value to be broadcast. /// \returns A 256-bit vector of [8 x float] whose 32-bit elements are set /// equal to the broadcast value. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_broadcast_ss(float const *__a) { struct __mm256_broadcast_ss_struct { float __f; } __attribute__((__packed__, __may_alias__)); float __f = ((const struct __mm256_broadcast_ss_struct*)__a)->__f; return __extension__ (__m256)(__v8sf){ __f, __f, __f, __f, __f, __f, __f, __f }; } /// Loads the data from a 128-bit vector of [2 x double] from the /// specified address pointed to by \a __a and broadcasts it to 128-bit /// elements in a 256-bit vector of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VBROADCASTF128 instruction. /// /// \param __a /// The 128-bit vector of [2 x double] to be broadcast. /// \returns A 256-bit vector of [4 x double] whose 128-bit elements are set /// equal to the broadcast value. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_broadcast_pd(__m128d const *__a) { __m128d __b = _mm_loadu_pd((const double *)__a); return (__m256d)__builtin_shufflevector((__v2df)__b, (__v2df)__b, 0, 1, 0, 1); } /// Loads the data from a 128-bit vector of [4 x float] from the /// specified address pointed to by \a __a and broadcasts it to 128-bit /// elements in a 256-bit vector of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VBROADCASTF128 instruction. /// /// \param __a /// The 128-bit vector of [4 x float] to be broadcast. /// \returns A 256-bit vector of [8 x float] whose 128-bit elements are set /// equal to the broadcast value. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_broadcast_ps(__m128 const *__a) { __m128 __b = _mm_loadu_ps((const float *)__a); return (__m256)__builtin_shufflevector((__v4sf)__b, (__v4sf)__b, 0, 1, 2, 3, 0, 1, 2, 3); } /* SIMD load ops */ /// Loads 4 double-precision floating point values from a 32-byte aligned /// memory location pointed to by \a __p into a vector of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVAPD instruction. /// /// \param __p /// A 32-byte aligned pointer to a memory location containing /// double-precision floating point values. /// \returns A 256-bit vector of [4 x double] containing the moved values. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_load_pd(double const *__p) { return *(const __m256d *)__p; } /// Loads 8 single-precision floating point values from a 32-byte aligned /// memory location pointed to by \a __p into a vector of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVAPS instruction. /// /// \param __p /// A 32-byte aligned pointer to a memory location containing float values. /// \returns A 256-bit vector of [8 x float] containing the moved values. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_load_ps(float const *__p) { return *(const __m256 *)__p; } /// Loads 4 double-precision floating point values from an unaligned /// memory location pointed to by \a __p into a vector of [4 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVUPD instruction. /// /// \param __p /// A pointer to a memory location containing double-precision floating /// point values. /// \returns A 256-bit vector of [4 x double] containing the moved values. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_loadu_pd(double const *__p) { struct __loadu_pd { __m256d_u __v; } __attribute__((__packed__, __may_alias__)); return ((const struct __loadu_pd*)__p)->__v; } /// Loads 8 single-precision floating point values from an unaligned /// memory location pointed to by \a __p into a vector of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVUPS instruction. /// /// \param __p /// A pointer to a memory location containing single-precision floating /// point values. /// \returns A 256-bit vector of [8 x float] containing the moved values. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_loadu_ps(float const *__p) { struct __loadu_ps { __m256_u __v; } __attribute__((__packed__, __may_alias__)); return ((const struct __loadu_ps*)__p)->__v; } /// Loads 256 bits of integer data from a 32-byte aligned memory /// location pointed to by \a __p into elements of a 256-bit integer vector. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVDQA instruction. /// /// \param __p /// A 32-byte aligned pointer to a 256-bit integer vector containing integer /// values. /// \returns A 256-bit integer vector containing the moved values. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_load_si256(__m256i const *__p) { return *__p; } /// Loads 256 bits of integer data from an unaligned memory location /// pointed to by \a __p into a 256-bit integer vector. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVDQU instruction. /// /// \param __p /// A pointer to a 256-bit integer vector containing integer values. /// \returns A 256-bit integer vector containing the moved values. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_loadu_si256(__m256i_u const *__p) { struct __loadu_si256 { __m256i_u __v; } __attribute__((__packed__, __may_alias__)); return ((const struct __loadu_si256*)__p)->__v; } /// Loads 256 bits of integer data from an unaligned memory location /// pointed to by \a __p into a 256-bit integer vector. This intrinsic may /// perform better than \c _mm256_loadu_si256 when the data crosses a cache /// line boundary. /// /// \headerfile /// /// This intrinsic corresponds to the VLDDQU instruction. /// /// \param __p /// A pointer to a 256-bit integer vector containing integer values. /// \returns A 256-bit integer vector containing the moved values. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_lddqu_si256(__m256i_u const *__p) { return (__m256i)__builtin_ia32_lddqu256((char const *)__p); } /* SIMD store ops */ /// Stores double-precision floating point values from a 256-bit vector /// of [4 x double] to a 32-byte aligned memory location pointed to by /// \a __p. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVAPD instruction. /// /// \param __p /// A 32-byte aligned pointer to a memory location that will receive the /// double-precision floaing point values. /// \param __a /// A 256-bit vector of [4 x double] containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS _mm256_store_pd(double *__p, __m256d __a) { *(__m256d *)__p = __a; } /// Stores single-precision floating point values from a 256-bit vector /// of [8 x float] to a 32-byte aligned memory location pointed to by \a __p. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVAPS instruction. /// /// \param __p /// A 32-byte aligned pointer to a memory location that will receive the /// float values. /// \param __a /// A 256-bit vector of [8 x float] containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS _mm256_store_ps(float *__p, __m256 __a) { *(__m256 *)__p = __a; } /// Stores double-precision floating point values from a 256-bit vector /// of [4 x double] to an unaligned memory location pointed to by \a __p. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVUPD instruction. /// /// \param __p /// A pointer to a memory location that will receive the double-precision /// floating point values. /// \param __a /// A 256-bit vector of [4 x double] containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS _mm256_storeu_pd(double *__p, __m256d __a) { struct __storeu_pd { __m256d_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_pd*)__p)->__v = __a; } /// Stores single-precision floating point values from a 256-bit vector /// of [8 x float] to an unaligned memory location pointed to by \a __p. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVUPS instruction. /// /// \param __p /// A pointer to a memory location that will receive the float values. /// \param __a /// A 256-bit vector of [8 x float] containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS _mm256_storeu_ps(float *__p, __m256 __a) { struct __storeu_ps { __m256_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_ps*)__p)->__v = __a; } /// Stores integer values from a 256-bit integer vector to a 32-byte /// aligned memory location pointed to by \a __p. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVDQA instruction. /// /// \param __p /// A 32-byte aligned pointer to a memory location that will receive the /// integer values. /// \param __a /// A 256-bit integer vector containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS _mm256_store_si256(__m256i *__p, __m256i __a) { *__p = __a; } /// Stores integer values from a 256-bit integer vector to an unaligned /// memory location pointed to by \a __p. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVDQU instruction. /// /// \param __p /// A pointer to a memory location that will receive the integer values. /// \param __a /// A 256-bit integer vector containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS _mm256_storeu_si256(__m256i_u *__p, __m256i __a) { struct __storeu_si256 { __m256i_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_si256*)__p)->__v = __a; } /* Conditional load ops */ /// Conditionally loads double-precision floating point elements from a /// memory location pointed to by \a __p into a 128-bit vector of /// [2 x double], depending on the mask bits associated with each data /// element. /// /// \headerfile /// /// This intrinsic corresponds to the VMASKMOVPD instruction. /// /// \param __p /// A pointer to a memory location that contains the double-precision /// floating point values. /// \param __m /// A 128-bit integer vector containing the mask. The most significant bit of /// each data element represents the mask bits. If a mask bit is zero, the /// corresponding value in the memory location is not loaded and the /// corresponding field in the return value is set to zero. /// \returns A 128-bit vector of [2 x double] containing the loaded values. static __inline __m128d __DEFAULT_FN_ATTRS128 _mm_maskload_pd(double const *__p, __m128i __m) { return (__m128d)__builtin_ia32_maskloadpd((const __v2df *)__p, (__v2di)__m); } /// Conditionally loads double-precision floating point elements from a /// memory location pointed to by \a __p into a 256-bit vector of /// [4 x double], depending on the mask bits associated with each data /// element. /// /// \headerfile /// /// This intrinsic corresponds to the VMASKMOVPD instruction. /// /// \param __p /// A pointer to a memory location that contains the double-precision /// floating point values. /// \param __m /// A 256-bit integer vector of [4 x quadword] containing the mask. The most /// significant bit of each quadword element represents the mask bits. If a /// mask bit is zero, the corresponding value in the memory location is not /// loaded and the corresponding field in the return value is set to zero. /// \returns A 256-bit vector of [4 x double] containing the loaded values. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_maskload_pd(double const *__p, __m256i __m) { return (__m256d)__builtin_ia32_maskloadpd256((const __v4df *)__p, (__v4di)__m); } /// Conditionally loads single-precision floating point elements from a /// memory location pointed to by \a __p into a 128-bit vector of /// [4 x float], depending on the mask bits associated with each data /// element. /// /// \headerfile /// /// This intrinsic corresponds to the VMASKMOVPS instruction. /// /// \param __p /// A pointer to a memory location that contains the single-precision /// floating point values. /// \param __m /// A 128-bit integer vector containing the mask. The most significant bit of /// each data element represents the mask bits. If a mask bit is zero, the /// corresponding value in the memory location is not loaded and the /// corresponding field in the return value is set to zero. /// \returns A 128-bit vector of [4 x float] containing the loaded values. static __inline __m128 __DEFAULT_FN_ATTRS128 _mm_maskload_ps(float const *__p, __m128i __m) { return (__m128)__builtin_ia32_maskloadps((const __v4sf *)__p, (__v4si)__m); } /// Conditionally loads single-precision floating point elements from a /// memory location pointed to by \a __p into a 256-bit vector of /// [8 x float], depending on the mask bits associated with each data /// element. /// /// \headerfile /// /// This intrinsic corresponds to the VMASKMOVPS instruction. /// /// \param __p /// A pointer to a memory location that contains the single-precision /// floating point values. /// \param __m /// A 256-bit integer vector of [8 x dword] containing the mask. The most /// significant bit of each dword element represents the mask bits. If a mask /// bit is zero, the corresponding value in the memory location is not loaded /// and the corresponding field in the return value is set to zero. /// \returns A 256-bit vector of [8 x float] containing the loaded values. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_maskload_ps(float const *__p, __m256i __m) { return (__m256)__builtin_ia32_maskloadps256((const __v8sf *)__p, (__v8si)__m); } /* Conditional store ops */ /// Moves single-precision floating point values from a 256-bit vector /// of [8 x float] to a memory location pointed to by \a __p, according to /// the specified mask. /// /// \headerfile /// /// This intrinsic corresponds to the VMASKMOVPS instruction. /// /// \param __p /// A pointer to a memory location that will receive the float values. /// \param __m /// A 256-bit integer vector of [8 x dword] containing the mask. The most /// significant bit of each dword element in the mask vector represents the /// mask bits. If a mask bit is zero, the corresponding value from vector /// \a __a is not stored and the corresponding field in the memory location /// pointed to by \a __p is not changed. /// \param __a /// A 256-bit vector of [8 x float] containing the values to be stored. static __inline void __DEFAULT_FN_ATTRS _mm256_maskstore_ps(float *__p, __m256i __m, __m256 __a) { __builtin_ia32_maskstoreps256((__v8sf *)__p, (__v8si)__m, (__v8sf)__a); } /// Moves double-precision values from a 128-bit vector of [2 x double] /// to a memory location pointed to by \a __p, according to the specified /// mask. /// /// \headerfile /// /// This intrinsic corresponds to the VMASKMOVPD instruction. /// /// \param __p /// A pointer to a memory location that will receive the float values. /// \param __m /// A 128-bit integer vector containing the mask. The most significant bit of /// each field in the mask vector represents the mask bits. If a mask bit is /// zero, the corresponding value from vector \a __a is not stored and the /// corresponding field in the memory location pointed to by \a __p is not /// changed. /// \param __a /// A 128-bit vector of [2 x double] containing the values to be stored. static __inline void __DEFAULT_FN_ATTRS128 _mm_maskstore_pd(double *__p, __m128i __m, __m128d __a) { __builtin_ia32_maskstorepd((__v2df *)__p, (__v2di)__m, (__v2df)__a); } /// Moves double-precision values from a 256-bit vector of [4 x double] /// to a memory location pointed to by \a __p, according to the specified /// mask. /// /// \headerfile /// /// This intrinsic corresponds to the VMASKMOVPD instruction. /// /// \param __p /// A pointer to a memory location that will receive the float values. /// \param __m /// A 256-bit integer vector of [4 x quadword] containing the mask. The most /// significant bit of each quadword element in the mask vector represents /// the mask bits. If a mask bit is zero, the corresponding value from vector /// __a is not stored and the corresponding field in the memory location /// pointed to by \a __p is not changed. /// \param __a /// A 256-bit vector of [4 x double] containing the values to be stored. static __inline void __DEFAULT_FN_ATTRS _mm256_maskstore_pd(double *__p, __m256i __m, __m256d __a) { __builtin_ia32_maskstorepd256((__v4df *)__p, (__v4di)__m, (__v4df)__a); } /// Moves single-precision floating point values from a 128-bit vector /// of [4 x float] to a memory location pointed to by \a __p, according to /// the specified mask. /// /// \headerfile /// /// This intrinsic corresponds to the VMASKMOVPS instruction. /// /// \param __p /// A pointer to a memory location that will receive the float values. /// \param __m /// A 128-bit integer vector containing the mask. The most significant bit of /// each field in the mask vector represents the mask bits. If a mask bit is /// zero, the corresponding value from vector __a is not stored and the /// corresponding field in the memory location pointed to by \a __p is not /// changed. /// \param __a /// A 128-bit vector of [4 x float] containing the values to be stored. static __inline void __DEFAULT_FN_ATTRS128 _mm_maskstore_ps(float *__p, __m128i __m, __m128 __a) { __builtin_ia32_maskstoreps((__v4sf *)__p, (__v4si)__m, (__v4sf)__a); } /* Cacheability support ops */ /// Moves integer data from a 256-bit integer vector to a 32-byte /// aligned memory location. To minimize caching, the data is flagged as /// non-temporal (unlikely to be used again soon). /// /// \headerfile /// /// This intrinsic corresponds to the VMOVNTDQ instruction. /// /// \param __a /// A pointer to a 32-byte aligned memory location that will receive the /// integer values. /// \param __b /// A 256-bit integer vector containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS _mm256_stream_si256(__m256i *__a, __m256i __b) { typedef __v4di __v4di_aligned __attribute__((aligned(32))); __builtin_nontemporal_store((__v4di_aligned)__b, (__v4di_aligned*)__a); } /// Moves double-precision values from a 256-bit vector of [4 x double] /// to a 32-byte aligned memory location. To minimize caching, the data is /// flagged as non-temporal (unlikely to be used again soon). /// /// \headerfile /// /// This intrinsic corresponds to the VMOVNTPD instruction. /// /// \param __a /// A pointer to a 32-byte aligned memory location that will receive the /// double-precision floating-point values. /// \param __b /// A 256-bit vector of [4 x double] containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS _mm256_stream_pd(double *__a, __m256d __b) { typedef __v4df __v4df_aligned __attribute__((aligned(32))); __builtin_nontemporal_store((__v4df_aligned)__b, (__v4df_aligned*)__a); } /// Moves single-precision floating point values from a 256-bit vector /// of [8 x float] to a 32-byte aligned memory location. To minimize /// caching, the data is flagged as non-temporal (unlikely to be used again /// soon). /// /// \headerfile /// /// This intrinsic corresponds to the VMOVNTPS instruction. /// /// \param __p /// A pointer to a 32-byte aligned memory location that will receive the /// single-precision floating point values. /// \param __a /// A 256-bit vector of [8 x float] containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS _mm256_stream_ps(float *__p, __m256 __a) { typedef __v8sf __v8sf_aligned __attribute__((aligned(32))); __builtin_nontemporal_store((__v8sf_aligned)__a, (__v8sf_aligned*)__p); } /* Create vectors */ /// Create a 256-bit vector of [4 x double] with undefined values. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \returns A 256-bit vector of [4 x double] containing undefined values. static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_undefined_pd(void) { return (__m256d)__builtin_ia32_undef256(); } /// Create a 256-bit vector of [8 x float] with undefined values. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \returns A 256-bit vector of [8 x float] containing undefined values. static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps(void) { return (__m256)__builtin_ia32_undef256(); } /// Create a 256-bit integer vector with undefined values. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \returns A 256-bit integer vector containing undefined values. static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void) { return (__m256i)__builtin_ia32_undef256(); } /// Constructs a 256-bit floating-point vector of [4 x double] /// initialized with the specified double-precision floating-point values. /// /// \headerfile /// /// This intrinsic corresponds to the VUNPCKLPD+VINSERTF128 /// instruction. /// /// \param __a /// A double-precision floating-point value used to initialize bits [255:192] /// of the result. /// \param __b /// A double-precision floating-point value used to initialize bits [191:128] /// of the result. /// \param __c /// A double-precision floating-point value used to initialize bits [127:64] /// of the result. /// \param __d /// A double-precision floating-point value used to initialize bits [63:0] /// of the result. /// \returns An initialized 256-bit floating-point vector of [4 x double]. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_set_pd(double __a, double __b, double __c, double __d) { return __extension__ (__m256d){ __d, __c, __b, __a }; } /// Constructs a 256-bit floating-point vector of [8 x float] initialized /// with the specified single-precision floating-point values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __a /// A single-precision floating-point value used to initialize bits [255:224] /// of the result. /// \param __b /// A single-precision floating-point value used to initialize bits [223:192] /// of the result. /// \param __c /// A single-precision floating-point value used to initialize bits [191:160] /// of the result. /// \param __d /// A single-precision floating-point value used to initialize bits [159:128] /// of the result. /// \param __e /// A single-precision floating-point value used to initialize bits [127:96] /// of the result. /// \param __f /// A single-precision floating-point value used to initialize bits [95:64] /// of the result. /// \param __g /// A single-precision floating-point value used to initialize bits [63:32] /// of the result. /// \param __h /// A single-precision floating-point value used to initialize bits [31:0] /// of the result. /// \returns An initialized 256-bit floating-point vector of [8 x float]. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_set_ps(float __a, float __b, float __c, float __d, float __e, float __f, float __g, float __h) { return __extension__ (__m256){ __h, __g, __f, __e, __d, __c, __b, __a }; } /// Constructs a 256-bit integer vector initialized with the specified /// 32-bit integral values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __i0 /// A 32-bit integral value used to initialize bits [255:224] of the result. /// \param __i1 /// A 32-bit integral value used to initialize bits [223:192] of the result. /// \param __i2 /// A 32-bit integral value used to initialize bits [191:160] of the result. /// \param __i3 /// A 32-bit integral value used to initialize bits [159:128] of the result. /// \param __i4 /// A 32-bit integral value used to initialize bits [127:96] of the result. /// \param __i5 /// A 32-bit integral value used to initialize bits [95:64] of the result. /// \param __i6 /// A 32-bit integral value used to initialize bits [63:32] of the result. /// \param __i7 /// A 32-bit integral value used to initialize bits [31:0] of the result. /// \returns An initialized 256-bit integer vector. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set_epi32(int __i0, int __i1, int __i2, int __i3, int __i4, int __i5, int __i6, int __i7) { return __extension__ (__m256i)(__v8si){ __i7, __i6, __i5, __i4, __i3, __i2, __i1, __i0 }; } /// Constructs a 256-bit integer vector initialized with the specified /// 16-bit integral values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __w15 /// A 16-bit integral value used to initialize bits [255:240] of the result. /// \param __w14 /// A 16-bit integral value used to initialize bits [239:224] of the result. /// \param __w13 /// A 16-bit integral value used to initialize bits [223:208] of the result. /// \param __w12 /// A 16-bit integral value used to initialize bits [207:192] of the result. /// \param __w11 /// A 16-bit integral value used to initialize bits [191:176] of the result. /// \param __w10 /// A 16-bit integral value used to initialize bits [175:160] of the result. /// \param __w09 /// A 16-bit integral value used to initialize bits [159:144] of the result. /// \param __w08 /// A 16-bit integral value used to initialize bits [143:128] of the result. /// \param __w07 /// A 16-bit integral value used to initialize bits [127:112] of the result. /// \param __w06 /// A 16-bit integral value used to initialize bits [111:96] of the result. /// \param __w05 /// A 16-bit integral value used to initialize bits [95:80] of the result. /// \param __w04 /// A 16-bit integral value used to initialize bits [79:64] of the result. /// \param __w03 /// A 16-bit integral value used to initialize bits [63:48] of the result. /// \param __w02 /// A 16-bit integral value used to initialize bits [47:32] of the result. /// \param __w01 /// A 16-bit integral value used to initialize bits [31:16] of the result. /// \param __w00 /// A 16-bit integral value used to initialize bits [15:0] of the result. /// \returns An initialized 256-bit integer vector. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set_epi16(short __w15, short __w14, short __w13, short __w12, short __w11, short __w10, short __w09, short __w08, short __w07, short __w06, short __w05, short __w04, short __w03, short __w02, short __w01, short __w00) { return __extension__ (__m256i)(__v16hi){ __w00, __w01, __w02, __w03, __w04, __w05, __w06, __w07, __w08, __w09, __w10, __w11, __w12, __w13, __w14, __w15 }; } /// Constructs a 256-bit integer vector initialized with the specified /// 8-bit integral values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __b31 /// An 8-bit integral value used to initialize bits [255:248] of the result. /// \param __b30 /// An 8-bit integral value used to initialize bits [247:240] of the result. /// \param __b29 /// An 8-bit integral value used to initialize bits [239:232] of the result. /// \param __b28 /// An 8-bit integral value used to initialize bits [231:224] of the result. /// \param __b27 /// An 8-bit integral value used to initialize bits [223:216] of the result. /// \param __b26 /// An 8-bit integral value used to initialize bits [215:208] of the result. /// \param __b25 /// An 8-bit integral value used to initialize bits [207:200] of the result. /// \param __b24 /// An 8-bit integral value used to initialize bits [199:192] of the result. /// \param __b23 /// An 8-bit integral value used to initialize bits [191:184] of the result. /// \param __b22 /// An 8-bit integral value used to initialize bits [183:176] of the result. /// \param __b21 /// An 8-bit integral value used to initialize bits [175:168] of the result. /// \param __b20 /// An 8-bit integral value used to initialize bits [167:160] of the result. /// \param __b19 /// An 8-bit integral value used to initialize bits [159:152] of the result. /// \param __b18 /// An 8-bit integral value used to initialize bits [151:144] of the result. /// \param __b17 /// An 8-bit integral value used to initialize bits [143:136] of the result. /// \param __b16 /// An 8-bit integral value used to initialize bits [135:128] of the result. /// \param __b15 /// An 8-bit integral value used to initialize bits [127:120] of the result. /// \param __b14 /// An 8-bit integral value used to initialize bits [119:112] of the result. /// \param __b13 /// An 8-bit integral value used to initialize bits [111:104] of the result. /// \param __b12 /// An 8-bit integral value used to initialize bits [103:96] of the result. /// \param __b11 /// An 8-bit integral value used to initialize bits [95:88] of the result. /// \param __b10 /// An 8-bit integral value used to initialize bits [87:80] of the result. /// \param __b09 /// An 8-bit integral value used to initialize bits [79:72] of the result. /// \param __b08 /// An 8-bit integral value used to initialize bits [71:64] of the result. /// \param __b07 /// An 8-bit integral value used to initialize bits [63:56] of the result. /// \param __b06 /// An 8-bit integral value used to initialize bits [55:48] of the result. /// \param __b05 /// An 8-bit integral value used to initialize bits [47:40] of the result. /// \param __b04 /// An 8-bit integral value used to initialize bits [39:32] of the result. /// \param __b03 /// An 8-bit integral value used to initialize bits [31:24] of the result. /// \param __b02 /// An 8-bit integral value used to initialize bits [23:16] of the result. /// \param __b01 /// An 8-bit integral value used to initialize bits [15:8] of the result. /// \param __b00 /// An 8-bit integral value used to initialize bits [7:0] of the result. /// \returns An initialized 256-bit integer vector. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set_epi8(char __b31, char __b30, char __b29, char __b28, char __b27, char __b26, char __b25, char __b24, char __b23, char __b22, char __b21, char __b20, char __b19, char __b18, char __b17, char __b16, char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b09, char __b08, char __b07, char __b06, char __b05, char __b04, char __b03, char __b02, char __b01, char __b00) { return __extension__ (__m256i)(__v32qi){ __b00, __b01, __b02, __b03, __b04, __b05, __b06, __b07, __b08, __b09, __b10, __b11, __b12, __b13, __b14, __b15, __b16, __b17, __b18, __b19, __b20, __b21, __b22, __b23, __b24, __b25, __b26, __b27, __b28, __b29, __b30, __b31 }; } /// Constructs a 256-bit integer vector initialized with the specified /// 64-bit integral values. /// /// \headerfile /// /// This intrinsic corresponds to the VPUNPCKLQDQ+VINSERTF128 /// instruction. /// /// \param __a /// A 64-bit integral value used to initialize bits [255:192] of the result. /// \param __b /// A 64-bit integral value used to initialize bits [191:128] of the result. /// \param __c /// A 64-bit integral value used to initialize bits [127:64] of the result. /// \param __d /// A 64-bit integral value used to initialize bits [63:0] of the result. /// \returns An initialized 256-bit integer vector. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set_epi64x(long long __a, long long __b, long long __c, long long __d) { return __extension__ (__m256i)(__v4di){ __d, __c, __b, __a }; } /* Create vectors with elements in reverse order */ /// Constructs a 256-bit floating-point vector of [4 x double], /// initialized in reverse order with the specified double-precision /// floating-point values. /// /// \headerfile /// /// This intrinsic corresponds to the VUNPCKLPD+VINSERTF128 /// instruction. /// /// \param __a /// A double-precision floating-point value used to initialize bits [63:0] /// of the result. /// \param __b /// A double-precision floating-point value used to initialize bits [127:64] /// of the result. /// \param __c /// A double-precision floating-point value used to initialize bits [191:128] /// of the result. /// \param __d /// A double-precision floating-point value used to initialize bits [255:192] /// of the result. /// \returns An initialized 256-bit floating-point vector of [4 x double]. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setr_pd(double __a, double __b, double __c, double __d) { return _mm256_set_pd(__d, __c, __b, __a); } /// Constructs a 256-bit floating-point vector of [8 x float], /// initialized in reverse order with the specified single-precision /// float-point values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __a /// A single-precision floating-point value used to initialize bits [31:0] /// of the result. /// \param __b /// A single-precision floating-point value used to initialize bits [63:32] /// of the result. /// \param __c /// A single-precision floating-point value used to initialize bits [95:64] /// of the result. /// \param __d /// A single-precision floating-point value used to initialize bits [127:96] /// of the result. /// \param __e /// A single-precision floating-point value used to initialize bits [159:128] /// of the result. /// \param __f /// A single-precision floating-point value used to initialize bits [191:160] /// of the result. /// \param __g /// A single-precision floating-point value used to initialize bits [223:192] /// of the result. /// \param __h /// A single-precision floating-point value used to initialize bits [255:224] /// of the result. /// \returns An initialized 256-bit floating-point vector of [8 x float]. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setr_ps(float __a, float __b, float __c, float __d, float __e, float __f, float __g, float __h) { return _mm256_set_ps(__h, __g, __f, __e, __d, __c, __b, __a); } /// Constructs a 256-bit integer vector, initialized in reverse order /// with the specified 32-bit integral values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __i0 /// A 32-bit integral value used to initialize bits [31:0] of the result. /// \param __i1 /// A 32-bit integral value used to initialize bits [63:32] of the result. /// \param __i2 /// A 32-bit integral value used to initialize bits [95:64] of the result. /// \param __i3 /// A 32-bit integral value used to initialize bits [127:96] of the result. /// \param __i4 /// A 32-bit integral value used to initialize bits [159:128] of the result. /// \param __i5 /// A 32-bit integral value used to initialize bits [191:160] of the result. /// \param __i6 /// A 32-bit integral value used to initialize bits [223:192] of the result. /// \param __i7 /// A 32-bit integral value used to initialize bits [255:224] of the result. /// \returns An initialized 256-bit integer vector. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setr_epi32(int __i0, int __i1, int __i2, int __i3, int __i4, int __i5, int __i6, int __i7) { return _mm256_set_epi32(__i7, __i6, __i5, __i4, __i3, __i2, __i1, __i0); } /// Constructs a 256-bit integer vector, initialized in reverse order /// with the specified 16-bit integral values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __w15 /// A 16-bit integral value used to initialize bits [15:0] of the result. /// \param __w14 /// A 16-bit integral value used to initialize bits [31:16] of the result. /// \param __w13 /// A 16-bit integral value used to initialize bits [47:32] of the result. /// \param __w12 /// A 16-bit integral value used to initialize bits [63:48] of the result. /// \param __w11 /// A 16-bit integral value used to initialize bits [79:64] of the result. /// \param __w10 /// A 16-bit integral value used to initialize bits [95:80] of the result. /// \param __w09 /// A 16-bit integral value used to initialize bits [111:96] of the result. /// \param __w08 /// A 16-bit integral value used to initialize bits [127:112] of the result. /// \param __w07 /// A 16-bit integral value used to initialize bits [143:128] of the result. /// \param __w06 /// A 16-bit integral value used to initialize bits [159:144] of the result. /// \param __w05 /// A 16-bit integral value used to initialize bits [175:160] of the result. /// \param __w04 /// A 16-bit integral value used to initialize bits [191:176] of the result. /// \param __w03 /// A 16-bit integral value used to initialize bits [207:192] of the result. /// \param __w02 /// A 16-bit integral value used to initialize bits [223:208] of the result. /// \param __w01 /// A 16-bit integral value used to initialize bits [239:224] of the result. /// \param __w00 /// A 16-bit integral value used to initialize bits [255:240] of the result. /// \returns An initialized 256-bit integer vector. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12, short __w11, short __w10, short __w09, short __w08, short __w07, short __w06, short __w05, short __w04, short __w03, short __w02, short __w01, short __w00) { return _mm256_set_epi16(__w00, __w01, __w02, __w03, __w04, __w05, __w06, __w07, __w08, __w09, __w10, __w11, __w12, __w13, __w14, __w15); } /// Constructs a 256-bit integer vector, initialized in reverse order /// with the specified 8-bit integral values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __b31 /// An 8-bit integral value used to initialize bits [7:0] of the result. /// \param __b30 /// An 8-bit integral value used to initialize bits [15:8] of the result. /// \param __b29 /// An 8-bit integral value used to initialize bits [23:16] of the result. /// \param __b28 /// An 8-bit integral value used to initialize bits [31:24] of the result. /// \param __b27 /// An 8-bit integral value used to initialize bits [39:32] of the result. /// \param __b26 /// An 8-bit integral value used to initialize bits [47:40] of the result. /// \param __b25 /// An 8-bit integral value used to initialize bits [55:48] of the result. /// \param __b24 /// An 8-bit integral value used to initialize bits [63:56] of the result. /// \param __b23 /// An 8-bit integral value used to initialize bits [71:64] of the result. /// \param __b22 /// An 8-bit integral value used to initialize bits [79:72] of the result. /// \param __b21 /// An 8-bit integral value used to initialize bits [87:80] of the result. /// \param __b20 /// An 8-bit integral value used to initialize bits [95:88] of the result. /// \param __b19 /// An 8-bit integral value used to initialize bits [103:96] of the result. /// \param __b18 /// An 8-bit integral value used to initialize bits [111:104] of the result. /// \param __b17 /// An 8-bit integral value used to initialize bits [119:112] of the result. /// \param __b16 /// An 8-bit integral value used to initialize bits [127:120] of the result. /// \param __b15 /// An 8-bit integral value used to initialize bits [135:128] of the result. /// \param __b14 /// An 8-bit integral value used to initialize bits [143:136] of the result. /// \param __b13 /// An 8-bit integral value used to initialize bits [151:144] of the result. /// \param __b12 /// An 8-bit integral value used to initialize bits [159:152] of the result. /// \param __b11 /// An 8-bit integral value used to initialize bits [167:160] of the result. /// \param __b10 /// An 8-bit integral value used to initialize bits [175:168] of the result. /// \param __b09 /// An 8-bit integral value used to initialize bits [183:176] of the result. /// \param __b08 /// An 8-bit integral value used to initialize bits [191:184] of the result. /// \param __b07 /// An 8-bit integral value used to initialize bits [199:192] of the result. /// \param __b06 /// An 8-bit integral value used to initialize bits [207:200] of the result. /// \param __b05 /// An 8-bit integral value used to initialize bits [215:208] of the result. /// \param __b04 /// An 8-bit integral value used to initialize bits [223:216] of the result. /// \param __b03 /// An 8-bit integral value used to initialize bits [231:224] of the result. /// \param __b02 /// An 8-bit integral value used to initialize bits [239:232] of the result. /// \param __b01 /// An 8-bit integral value used to initialize bits [247:240] of the result. /// \param __b00 /// An 8-bit integral value used to initialize bits [255:248] of the result. /// \returns An initialized 256-bit integer vector. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28, char __b27, char __b26, char __b25, char __b24, char __b23, char __b22, char __b21, char __b20, char __b19, char __b18, char __b17, char __b16, char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b09, char __b08, char __b07, char __b06, char __b05, char __b04, char __b03, char __b02, char __b01, char __b00) { return _mm256_set_epi8(__b00, __b01, __b02, __b03, __b04, __b05, __b06, __b07, __b08, __b09, __b10, __b11, __b12, __b13, __b14, __b15, __b16, __b17, __b18, __b19, __b20, __b21, __b22, __b23, __b24, __b25, __b26, __b27, __b28, __b29, __b30, __b31); } /// Constructs a 256-bit integer vector, initialized in reverse order /// with the specified 64-bit integral values. /// /// \headerfile /// /// This intrinsic corresponds to the VPUNPCKLQDQ+VINSERTF128 /// instruction. /// /// \param __a /// A 64-bit integral value used to initialize bits [63:0] of the result. /// \param __b /// A 64-bit integral value used to initialize bits [127:64] of the result. /// \param __c /// A 64-bit integral value used to initialize bits [191:128] of the result. /// \param __d /// A 64-bit integral value used to initialize bits [255:192] of the result. /// \returns An initialized 256-bit integer vector. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setr_epi64x(long long __a, long long __b, long long __c, long long __d) { return _mm256_set_epi64x(__d, __c, __b, __a); } /* Create vectors with repeated elements */ /// Constructs a 256-bit floating-point vector of [4 x double], with each /// of the four double-precision floating-point vector elements set to the /// specified double-precision floating-point value. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVDDUP+VINSERTF128 instruction. /// /// \param __w /// A double-precision floating-point value used to initialize each vector /// element of the result. /// \returns An initialized 256-bit floating-point vector of [4 x double]. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_set1_pd(double __w) { return _mm256_set_pd(__w, __w, __w, __w); } /// Constructs a 256-bit floating-point vector of [8 x float], with each /// of the eight single-precision floating-point vector elements set to the /// specified single-precision floating-point value. /// /// \headerfile /// /// This intrinsic corresponds to the VPERMILPS+VINSERTF128 /// instruction. /// /// \param __w /// A single-precision floating-point value used to initialize each vector /// element of the result. /// \returns An initialized 256-bit floating-point vector of [8 x float]. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_set1_ps(float __w) { return _mm256_set_ps(__w, __w, __w, __w, __w, __w, __w, __w); } /// Constructs a 256-bit integer vector of [8 x i32], with each of the /// 32-bit integral vector elements set to the specified 32-bit integral /// value. /// /// \headerfile /// /// This intrinsic corresponds to the VPERMILPS+VINSERTF128 /// instruction. /// /// \param __i /// A 32-bit integral value used to initialize each vector element of the /// result. /// \returns An initialized 256-bit integer vector of [8 x i32]. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi32(int __i) { return _mm256_set_epi32(__i, __i, __i, __i, __i, __i, __i, __i); } /// Constructs a 256-bit integer vector of [16 x i16], with each of the /// 16-bit integral vector elements set to the specified 16-bit integral /// value. /// /// \headerfile /// /// This intrinsic corresponds to the VPSHUFB+VINSERTF128 instruction. /// /// \param __w /// A 16-bit integral value used to initialize each vector element of the /// result. /// \returns An initialized 256-bit integer vector of [16 x i16]. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi16(short __w) { return _mm256_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w); } /// Constructs a 256-bit integer vector of [32 x i8], with each of the /// 8-bit integral vector elements set to the specified 8-bit integral value. /// /// \headerfile /// /// This intrinsic corresponds to the VPSHUFB+VINSERTF128 instruction. /// /// \param __b /// An 8-bit integral value used to initialize each vector element of the /// result. /// \returns An initialized 256-bit integer vector of [32 x i8]. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi8(char __b) { return _mm256_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b); } /// Constructs a 256-bit integer vector of [4 x i64], with each of the /// 64-bit integral vector elements set to the specified 64-bit integral /// value. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVDDUP+VINSERTF128 instruction. /// /// \param __q /// A 64-bit integral value used to initialize each vector element of the /// result. /// \returns An initialized 256-bit integer vector of [4 x i64]. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi64x(long long __q) { return _mm256_set_epi64x(__q, __q, __q, __q); } /* Create __zeroed vectors */ /// Constructs a 256-bit floating-point vector of [4 x double] with all /// vector elements initialized to zero. /// /// \headerfile /// /// This intrinsic corresponds to the VXORPS instruction. /// /// \returns A 256-bit vector of [4 x double] with all elements set to zero. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setzero_pd(void) { return __extension__ (__m256d){ 0.0, 0.0, 0.0, 0.0 }; } /// Constructs a 256-bit floating-point vector of [8 x float] with all /// vector elements initialized to zero. /// /// \headerfile /// /// This intrinsic corresponds to the VXORPS instruction. /// /// \returns A 256-bit vector of [8 x float] with all elements set to zero. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setzero_ps(void) { return __extension__ (__m256){ 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f }; } /// Constructs a 256-bit integer vector initialized to zero. /// /// \headerfile /// /// This intrinsic corresponds to the VXORPS instruction. /// /// \returns A 256-bit integer vector initialized to zero. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setzero_si256(void) { return __extension__ (__m256i)(__v4di){ 0, 0, 0, 0 }; } /* Cast between vector types */ /// Casts a 256-bit floating-point vector of [4 x double] into a 256-bit /// floating-point vector of [8 x float]. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 256-bit floating-point vector of [4 x double]. /// \returns A 256-bit floating-point vector of [8 x float] containing the same /// bitwise pattern as the parameter. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_castpd_ps(__m256d __a) { return (__m256)__a; } /// Casts a 256-bit floating-point vector of [4 x double] into a 256-bit /// integer vector. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 256-bit floating-point vector of [4 x double]. /// \returns A 256-bit integer vector containing the same bitwise pattern as the /// parameter. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_castpd_si256(__m256d __a) { return (__m256i)__a; } /// Casts a 256-bit floating-point vector of [8 x float] into a 256-bit /// floating-point vector of [4 x double]. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 256-bit floating-point vector of [8 x float]. /// \returns A 256-bit floating-point vector of [4 x double] containing the same /// bitwise pattern as the parameter. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_castps_pd(__m256 __a) { return (__m256d)__a; } /// Casts a 256-bit floating-point vector of [8 x float] into a 256-bit /// integer vector. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 256-bit floating-point vector of [8 x float]. /// \returns A 256-bit integer vector containing the same bitwise pattern as the /// parameter. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_castps_si256(__m256 __a) { return (__m256i)__a; } /// Casts a 256-bit integer vector into a 256-bit floating-point vector /// of [8 x float]. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 256-bit integer vector. /// \returns A 256-bit floating-point vector of [8 x float] containing the same /// bitwise pattern as the parameter. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_castsi256_ps(__m256i __a) { return (__m256)__a; } /// Casts a 256-bit integer vector into a 256-bit floating-point vector /// of [4 x double]. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 256-bit integer vector. /// \returns A 256-bit floating-point vector of [4 x double] containing the same /// bitwise pattern as the parameter. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_castsi256_pd(__m256i __a) { return (__m256d)__a; } /// Returns the lower 128 bits of a 256-bit floating-point vector of /// [4 x double] as a 128-bit floating-point vector of [2 x double]. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 256-bit floating-point vector of [4 x double]. /// \returns A 128-bit floating-point vector of [2 x double] containing the /// lower 128 bits of the parameter. static __inline __m128d __DEFAULT_FN_ATTRS _mm256_castpd256_pd128(__m256d __a) { return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 1); } /// Returns the lower 128 bits of a 256-bit floating-point vector of /// [8 x float] as a 128-bit floating-point vector of [4 x float]. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 256-bit floating-point vector of [8 x float]. /// \returns A 128-bit floating-point vector of [4 x float] containing the /// lower 128 bits of the parameter. static __inline __m128 __DEFAULT_FN_ATTRS _mm256_castps256_ps128(__m256 __a) { return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 1, 2, 3); } /// Truncates a 256-bit integer vector into a 128-bit integer vector. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 256-bit integer vector. /// \returns A 128-bit integer vector containing the lower 128 bits of the /// parameter. static __inline __m128i __DEFAULT_FN_ATTRS _mm256_castsi256_si128(__m256i __a) { return __builtin_shufflevector((__v4di)__a, (__v4di)__a, 0, 1); } /// Constructs a 256-bit floating-point vector of [4 x double] from a /// 128-bit floating-point vector of [2 x double]. /// /// The lower 128 bits contain the value of the source vector. The contents /// of the upper 128 bits are undefined. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \returns A 256-bit floating-point vector of [4 x double]. The lower 128 bits /// contain the value of the parameter. The contents of the upper 128 bits /// are undefined. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_castpd128_pd256(__m128d __a) { return __builtin_shufflevector( (__v2df)__a, (__v2df)__builtin_nondeterministic_value(__a), 0, 1, 2, 3); } /// Constructs a 256-bit floating-point vector of [8 x float] from a /// 128-bit floating-point vector of [4 x float]. /// /// The lower 128 bits contain the value of the source vector. The contents /// of the upper 128 bits are undefined. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \returns A 256-bit floating-point vector of [8 x float]. The lower 128 bits /// contain the value of the parameter. The contents of the upper 128 bits /// are undefined. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_castps128_ps256(__m128 __a) { return __builtin_shufflevector((__v4sf)__a, (__v4sf)__builtin_nondeterministic_value(__a), 0, 1, 2, 3, 4, 5, 6, 7); } /// Constructs a 256-bit integer vector from a 128-bit integer vector. /// /// The lower 128 bits contain the value of the source vector. The contents /// of the upper 128 bits are undefined. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 128-bit integer vector. /// \returns A 256-bit integer vector. The lower 128 bits contain the value of /// the parameter. The contents of the upper 128 bits are undefined. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_castsi128_si256(__m128i __a) { return __builtin_shufflevector( (__v2di)__a, (__v2di)__builtin_nondeterministic_value(__a), 0, 1, 2, 3); } /// Constructs a 256-bit floating-point vector of [4 x double] from a /// 128-bit floating-point vector of [2 x double]. The lower 128 bits /// contain the value of the source vector. The upper 128 bits are set /// to zero. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \returns A 256-bit floating-point vector of [4 x double]. The lower 128 bits /// contain the value of the parameter. The upper 128 bits are set to zero. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_zextpd128_pd256(__m128d __a) { return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3); } /// Constructs a 256-bit floating-point vector of [8 x float] from a /// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain /// the value of the source vector. The upper 128 bits are set to zero. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \returns A 256-bit floating-point vector of [8 x float]. The lower 128 bits /// contain the value of the parameter. The upper 128 bits are set to zero. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_zextps128_ps256(__m128 __a) { return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7); } /// Constructs a 256-bit integer vector from a 128-bit integer vector. /// The lower 128 bits contain the value of the source vector. The upper /// 128 bits are set to zero. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 128-bit integer vector. /// \returns A 256-bit integer vector. The lower 128 bits contain the value of /// the parameter. The upper 128 bits are set to zero. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_zextsi128_si256(__m128i __a) { return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3); } /* Vector insert. We use macros rather than inlines because we only want to accept invocations where the immediate M is a constant expression. */ /// Constructs a new 256-bit vector of [8 x float] by first duplicating /// a 256-bit vector of [8 x float] given in the first parameter, and then /// replacing either the upper or the lower 128 bits with the contents of a /// 128-bit vector of [4 x float] in the second parameter. /// /// The immediate integer parameter determines between the upper or the lower /// 128 bits. /// /// \headerfile /// /// \code /// __m256 _mm256_insertf128_ps(__m256 V1, __m128 V2, const int M); /// \endcode /// /// This intrinsic corresponds to the VINSERTF128 instruction. /// /// \param V1 /// A 256-bit vector of [8 x float]. This vector is copied to the result /// first, and then either the upper or the lower 128 bits of the result will /// be replaced by the contents of \a V2. /// \param V2 /// A 128-bit vector of [4 x float]. The contents of this parameter are /// written to either the upper or the lower 128 bits of the result depending /// on the value of parameter \a M. /// \param M /// An immediate integer. The least significant bit determines how the values /// from the two parameters are interleaved: \n /// If bit [0] of \a M is 0, \a V2 are copied to bits [127:0] of the result, /// and bits [255:128] of \a V1 are copied to bits [255:128] of the /// result. \n /// If bit [0] of \a M is 1, \a V2 are copied to bits [255:128] of the /// result, and bits [127:0] of \a V1 are copied to bits [127:0] of the /// result. /// \returns A 256-bit vector of [8 x float] containing the interleaved values. #define _mm256_insertf128_ps(V1, V2, M) \ ((__m256)__builtin_ia32_vinsertf128_ps256((__v8sf)(__m256)(V1), \ (__v4sf)(__m128)(V2), (int)(M))) /// Constructs a new 256-bit vector of [4 x double] by first duplicating /// a 256-bit vector of [4 x double] given in the first parameter, and then /// replacing either the upper or the lower 128 bits with the contents of a /// 128-bit vector of [2 x double] in the second parameter. /// /// The immediate integer parameter determines between the upper or the lower /// 128 bits. /// /// \headerfile /// /// \code /// __m256d _mm256_insertf128_pd(__m256d V1, __m128d V2, const int M); /// \endcode /// /// This intrinsic corresponds to the VINSERTF128 instruction. /// /// \param V1 /// A 256-bit vector of [4 x double]. This vector is copied to the result /// first, and then either the upper or the lower 128 bits of the result will /// be replaced by the contents of \a V2. /// \param V2 /// A 128-bit vector of [2 x double]. The contents of this parameter are /// written to either the upper or the lower 128 bits of the result depending /// on the value of parameter \a M. /// \param M /// An immediate integer. The least significant bit determines how the values /// from the two parameters are interleaved: \n /// If bit [0] of \a M is 0, \a V2 are copied to bits [127:0] of the result, /// and bits [255:128] of \a V1 are copied to bits [255:128] of the /// result. \n /// If bit [0] of \a M is 1, \a V2 are copied to bits [255:128] of the /// result, and bits [127:0] of \a V1 are copied to bits [127:0] of the /// result. /// \returns A 256-bit vector of [4 x double] containing the interleaved values. #define _mm256_insertf128_pd(V1, V2, M) \ ((__m256d)__builtin_ia32_vinsertf128_pd256((__v4df)(__m256d)(V1), \ (__v2df)(__m128d)(V2), (int)(M))) /// Constructs a new 256-bit integer vector by first duplicating a /// 256-bit integer vector given in the first parameter, and then replacing /// either the upper or the lower 128 bits with the contents of a 128-bit /// integer vector in the second parameter. /// /// The immediate integer parameter determines between the upper or the lower /// 128 bits. /// /// \headerfile /// /// \code /// __m256i _mm256_insertf128_si256(__m256i V1, __m128i V2, const int M); /// \endcode /// /// This intrinsic corresponds to the VINSERTF128 instruction. /// /// \param V1 /// A 256-bit integer vector. This vector is copied to the result first, and /// then either the upper or the lower 128 bits of the result will be /// replaced by the contents of \a V2. /// \param V2 /// A 128-bit integer vector. The contents of this parameter are written to /// either the upper or the lower 128 bits of the result depending on the /// value of parameter \a M. /// \param M /// An immediate integer. The least significant bit determines how the values /// from the two parameters are interleaved: \n /// If bit [0] of \a M is 0, \a V2 are copied to bits [127:0] of the result, /// and bits [255:128] of \a V1 are copied to bits [255:128] of the /// result. \n /// If bit [0] of \a M is 1, \a V2 are copied to bits [255:128] of the /// result, and bits [127:0] of \a V1 are copied to bits [127:0] of the /// result. /// \returns A 256-bit integer vector containing the interleaved values. #define _mm256_insertf128_si256(V1, V2, M) \ ((__m256i)__builtin_ia32_vinsertf128_si256((__v8si)(__m256i)(V1), \ (__v4si)(__m128i)(V2), (int)(M))) /* Vector extract. We use macros rather than inlines because we only want to accept invocations where the immediate M is a constant expression. */ /// Extracts either the upper or the lower 128 bits from a 256-bit vector /// of [8 x float], as determined by the immediate integer parameter, and /// returns the extracted bits as a 128-bit vector of [4 x float]. /// /// \headerfile /// /// \code /// __m128 _mm256_extractf128_ps(__m256 V, const int M); /// \endcode /// /// This intrinsic corresponds to the VEXTRACTF128 instruction. /// /// \param V /// A 256-bit vector of [8 x float]. /// \param M /// An immediate integer. The least significant bit determines which bits are /// extracted from the first parameter: \n /// If bit [0] of \a M is 0, bits [127:0] of \a V are copied to the /// result. \n /// If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result. /// \returns A 128-bit vector of [4 x float] containing the extracted bits. #define _mm256_extractf128_ps(V, M) \ ((__m128)__builtin_ia32_vextractf128_ps256((__v8sf)(__m256)(V), (int)(M))) /// Extracts either the upper or the lower 128 bits from a 256-bit vector /// of [4 x double], as determined by the immediate integer parameter, and /// returns the extracted bits as a 128-bit vector of [2 x double]. /// /// \headerfile /// /// \code /// __m128d _mm256_extractf128_pd(__m256d V, const int M); /// \endcode /// /// This intrinsic corresponds to the VEXTRACTF128 instruction. /// /// \param V /// A 256-bit vector of [4 x double]. /// \param M /// An immediate integer. The least significant bit determines which bits are /// extracted from the first parameter: \n /// If bit [0] of \a M is 0, bits [127:0] of \a V are copied to the /// result. \n /// If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result. /// \returns A 128-bit vector of [2 x double] containing the extracted bits. #define _mm256_extractf128_pd(V, M) \ ((__m128d)__builtin_ia32_vextractf128_pd256((__v4df)(__m256d)(V), (int)(M))) /// Extracts either the upper or the lower 128 bits from a 256-bit /// integer vector, as determined by the immediate integer parameter, and /// returns the extracted bits as a 128-bit integer vector. /// /// \headerfile /// /// \code /// __m128i _mm256_extractf128_si256(__m256i V, const int M); /// \endcode /// /// This intrinsic corresponds to the VEXTRACTF128 instruction. /// /// \param V /// A 256-bit integer vector. /// \param M /// An immediate integer. The least significant bit determines which bits are /// extracted from the first parameter: \n /// If bit [0] of \a M is 0, bits [127:0] of \a V are copied to the /// result. \n /// If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result. /// \returns A 128-bit integer vector containing the extracted bits. #define _mm256_extractf128_si256(V, M) \ ((__m128i)__builtin_ia32_vextractf128_si256((__v8si)(__m256i)(V), (int)(M))) /// Constructs a 256-bit floating-point vector of [8 x float] by /// concatenating two 128-bit floating-point vectors of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VINSERTF128 instruction. /// /// \param __hi /// A 128-bit floating-point vector of [4 x float] to be copied to the upper /// 128 bits of the result. /// \param __lo /// A 128-bit floating-point vector of [4 x float] to be copied to the lower /// 128 bits of the result. /// \returns A 256-bit floating-point vector of [8 x float] containing the /// concatenated result. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_set_m128 (__m128 __hi, __m128 __lo) { return (__m256) __builtin_shufflevector((__v4sf)__lo, (__v4sf)__hi, 0, 1, 2, 3, 4, 5, 6, 7); } /// Constructs a 256-bit floating-point vector of [4 x double] by /// concatenating two 128-bit floating-point vectors of [2 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VINSERTF128 instruction. /// /// \param __hi /// A 128-bit floating-point vector of [2 x double] to be copied to the upper /// 128 bits of the result. /// \param __lo /// A 128-bit floating-point vector of [2 x double] to be copied to the lower /// 128 bits of the result. /// \returns A 256-bit floating-point vector of [4 x double] containing the /// concatenated result. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_set_m128d (__m128d __hi, __m128d __lo) { return (__m256d) __builtin_shufflevector((__v2df)__lo, (__v2df)__hi, 0, 1, 2, 3); } /// Constructs a 256-bit integer vector by concatenating two 128-bit /// integer vectors. /// /// \headerfile /// /// This intrinsic corresponds to the VINSERTF128 instruction. /// /// \param __hi /// A 128-bit integer vector to be copied to the upper 128 bits of the /// result. /// \param __lo /// A 128-bit integer vector to be copied to the lower 128 bits of the /// result. /// \returns A 256-bit integer vector containing the concatenated result. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set_m128i (__m128i __hi, __m128i __lo) { return (__m256i) __builtin_shufflevector((__v2di)__lo, (__v2di)__hi, 0, 1, 2, 3); } /// Constructs a 256-bit floating-point vector of [8 x float] by /// concatenating two 128-bit floating-point vectors of [4 x float]. This is /// similar to _mm256_set_m128, but the order of the input parameters is /// swapped. /// /// \headerfile /// /// This intrinsic corresponds to the VINSERTF128 instruction. /// /// \param __lo /// A 128-bit floating-point vector of [4 x float] to be copied to the lower /// 128 bits of the result. /// \param __hi /// A 128-bit floating-point vector of [4 x float] to be copied to the upper /// 128 bits of the result. /// \returns A 256-bit floating-point vector of [8 x float] containing the /// concatenated result. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setr_m128 (__m128 __lo, __m128 __hi) { return _mm256_set_m128(__hi, __lo); } /// Constructs a 256-bit floating-point vector of [4 x double] by /// concatenating two 128-bit floating-point vectors of [2 x double]. This is /// similar to _mm256_set_m128d, but the order of the input parameters is /// swapped. /// /// \headerfile /// /// This intrinsic corresponds to the VINSERTF128 instruction. /// /// \param __lo /// A 128-bit floating-point vector of [2 x double] to be copied to the lower /// 128 bits of the result. /// \param __hi /// A 128-bit floating-point vector of [2 x double] to be copied to the upper /// 128 bits of the result. /// \returns A 256-bit floating-point vector of [4 x double] containing the /// concatenated result. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setr_m128d (__m128d __lo, __m128d __hi) { return (__m256d)_mm256_set_m128d(__hi, __lo); } /// Constructs a 256-bit integer vector by concatenating two 128-bit /// integer vectors. This is similar to _mm256_set_m128i, but the order of /// the input parameters is swapped. /// /// \headerfile /// /// This intrinsic corresponds to the VINSERTF128 instruction. /// /// \param __lo /// A 128-bit integer vector to be copied to the lower 128 bits of the /// result. /// \param __hi /// A 128-bit integer vector to be copied to the upper 128 bits of the /// result. /// \returns A 256-bit integer vector containing the concatenated result. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setr_m128i (__m128i __lo, __m128i __hi) { return (__m256i)_mm256_set_m128i(__hi, __lo); } /* SIMD load ops (unaligned) */ /// Loads two 128-bit floating-point vectors of [4 x float] from /// unaligned memory locations and constructs a 256-bit floating-point vector /// of [8 x float] by concatenating the two 128-bit vectors. /// /// \headerfile /// /// This intrinsic corresponds to load instructions followed by the /// VINSERTF128 instruction. /// /// \param __addr_hi /// A pointer to a 128-bit memory location containing 4 consecutive /// single-precision floating-point values. These values are to be copied to /// bits[255:128] of the result. The address of the memory location does not /// have to be aligned. /// \param __addr_lo /// A pointer to a 128-bit memory location containing 4 consecutive /// single-precision floating-point values. These values are to be copied to /// bits[127:0] of the result. The address of the memory location does not /// have to be aligned. /// \returns A 256-bit floating-point vector of [8 x float] containing the /// concatenated result. static __inline __m256 __DEFAULT_FN_ATTRS _mm256_loadu2_m128(float const *__addr_hi, float const *__addr_lo) { return _mm256_set_m128(_mm_loadu_ps(__addr_hi), _mm_loadu_ps(__addr_lo)); } /// Loads two 128-bit floating-point vectors of [2 x double] from /// unaligned memory locations and constructs a 256-bit floating-point vector /// of [4 x double] by concatenating the two 128-bit vectors. /// /// \headerfile /// /// This intrinsic corresponds to load instructions followed by the /// VINSERTF128 instruction. /// /// \param __addr_hi /// A pointer to a 128-bit memory location containing two consecutive /// double-precision floating-point values. These values are to be copied to /// bits[255:128] of the result. The address of the memory location does not /// have to be aligned. /// \param __addr_lo /// A pointer to a 128-bit memory location containing two consecutive /// double-precision floating-point values. These values are to be copied to /// bits[127:0] of the result. The address of the memory location does not /// have to be aligned. /// \returns A 256-bit floating-point vector of [4 x double] containing the /// concatenated result. static __inline __m256d __DEFAULT_FN_ATTRS _mm256_loadu2_m128d(double const *__addr_hi, double const *__addr_lo) { return _mm256_set_m128d(_mm_loadu_pd(__addr_hi), _mm_loadu_pd(__addr_lo)); } /// Loads two 128-bit integer vectors from unaligned memory locations and /// constructs a 256-bit integer vector by concatenating the two 128-bit /// vectors. /// /// \headerfile /// /// This intrinsic corresponds to load instructions followed by the /// VINSERTF128 instruction. /// /// \param __addr_hi /// A pointer to a 128-bit memory location containing a 128-bit integer /// vector. This vector is to be copied to bits[255:128] of the result. The /// address of the memory location does not have to be aligned. /// \param __addr_lo /// A pointer to a 128-bit memory location containing a 128-bit integer /// vector. This vector is to be copied to bits[127:0] of the result. The /// address of the memory location does not have to be aligned. /// \returns A 256-bit integer vector containing the concatenated result. static __inline __m256i __DEFAULT_FN_ATTRS _mm256_loadu2_m128i(__m128i_u const *__addr_hi, __m128i_u const *__addr_lo) { return _mm256_set_m128i(_mm_loadu_si128(__addr_hi), _mm_loadu_si128(__addr_lo)); } /* SIMD store ops (unaligned) */ /// Stores the upper and lower 128 bits of a 256-bit floating-point /// vector of [8 x float] into two different unaligned memory locations. /// /// \headerfile /// /// This intrinsic corresponds to the VEXTRACTF128 instruction and the /// store instructions. /// /// \param __addr_hi /// A pointer to a 128-bit memory location. Bits[255:128] of \a __a are to be /// copied to this memory location. The address of this memory location does /// not have to be aligned. /// \param __addr_lo /// A pointer to a 128-bit memory location. Bits[127:0] of \a __a are to be /// copied to this memory location. The address of this memory location does /// not have to be aligned. /// \param __a /// A 256-bit floating-point vector of [8 x float]. static __inline void __DEFAULT_FN_ATTRS _mm256_storeu2_m128(float *__addr_hi, float *__addr_lo, __m256 __a) { __m128 __v128; __v128 = _mm256_castps256_ps128(__a); _mm_storeu_ps(__addr_lo, __v128); __v128 = _mm256_extractf128_ps(__a, 1); _mm_storeu_ps(__addr_hi, __v128); } /// Stores the upper and lower 128 bits of a 256-bit floating-point /// vector of [4 x double] into two different unaligned memory locations. /// /// \headerfile /// /// This intrinsic corresponds to the VEXTRACTF128 instruction and the /// store instructions. /// /// \param __addr_hi /// A pointer to a 128-bit memory location. Bits[255:128] of \a __a are to be /// copied to this memory location. The address of this memory location does /// not have to be aligned. /// \param __addr_lo /// A pointer to a 128-bit memory location. Bits[127:0] of \a __a are to be /// copied to this memory location. The address of this memory location does /// not have to be aligned. /// \param __a /// A 256-bit floating-point vector of [4 x double]. static __inline void __DEFAULT_FN_ATTRS _mm256_storeu2_m128d(double *__addr_hi, double *__addr_lo, __m256d __a) { __m128d __v128; __v128 = _mm256_castpd256_pd128(__a); _mm_storeu_pd(__addr_lo, __v128); __v128 = _mm256_extractf128_pd(__a, 1); _mm_storeu_pd(__addr_hi, __v128); } /// Stores the upper and lower 128 bits of a 256-bit integer vector into /// two different unaligned memory locations. /// /// \headerfile /// /// This intrinsic corresponds to the VEXTRACTF128 instruction and the /// store instructions. /// /// \param __addr_hi /// A pointer to a 128-bit memory location. Bits[255:128] of \a __a are to be /// copied to this memory location. The address of this memory location does /// not have to be aligned. /// \param __addr_lo /// A pointer to a 128-bit memory location. Bits[127:0] of \a __a are to be /// copied to this memory location. The address of this memory location does /// not have to be aligned. /// \param __a /// A 256-bit integer vector. static __inline void __DEFAULT_FN_ATTRS _mm256_storeu2_m128i(__m128i_u *__addr_hi, __m128i_u *__addr_lo, __m256i __a) { __m128i __v128; __v128 = _mm256_castsi256_si128(__a); _mm_storeu_si128(__addr_lo, __v128); __v128 = _mm256_extractf128_si256(__a, 1); _mm_storeu_si128(__addr_hi, __v128); } #undef __DEFAULT_FN_ATTRS #undef __DEFAULT_FN_ATTRS128 #endif /* __AVXINTRIN_H */ /builtins/avxneconvertintrin.h/*===-------------- avxneconvertintrin.h - AVXNECONVERT --------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error \ "Never use directly; include instead." #endif // __IMMINTRIN_H #ifdef __SSE2__ #ifndef __AVXNECONVERTINTRIN_H #define __AVXNECONVERTINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, __target__("avxneconvert"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, __target__("avxneconvert"), \ __min_vector_width__(256))) /// Convert scalar BF16 (16-bit) floating-point element /// stored at memory locations starting at location \a __A to a /// single-precision (32-bit) floating-point, broadcast it to packed /// single-precision (32-bit) floating-point elements, and store the results in /// \a dst. /// /// \headerfile /// /// \code /// _mm_bcstnebf16_ps(const void *__A); /// \endcode /// /// This intrinsic corresponds to the \c VBCSTNEBF162PS instruction. /// /// \param __A /// A pointer to a 16-bit memory location. The address of the memory /// location does not have to be aligned. /// \returns /// A 128-bit vector of [4 x float]. /// /// \code{.operation} /// b := Convert_BF16_To_FP32(MEM[__A+15:__A]) /// FOR j := 0 to 3 /// m := j*32 /// dst[m+31:m] := b /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_bcstnebf16_ps(const void *__A) { return (__m128)__builtin_ia32_vbcstnebf162ps128((const __bf16 *)__A); } /// Convert scalar BF16 (16-bit) floating-point element /// stored at memory locations starting at location \a __A to a /// single-precision (32-bit) floating-point, broadcast it to packed /// single-precision (32-bit) floating-point elements, and store the results in /// \a dst. /// /// \headerfile /// /// \code /// _mm256_bcstnebf16_ps(const void *__A); /// \endcode /// /// This intrinsic corresponds to the \c VBCSTNEBF162PS instruction. /// /// \param __A /// A pointer to a 16-bit memory location. The address of the memory /// location does not have to be aligned. /// \returns /// A 256-bit vector of [8 x float]. /// /// \code{.operation} /// b := Convert_BF16_To_FP32(MEM[__A+15:__A]) /// FOR j := 0 to 7 /// m := j*32 /// dst[m+31:m] := b /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_bcstnebf16_ps(const void *__A) { return (__m256)__builtin_ia32_vbcstnebf162ps256((const __bf16 *)__A); } /// Convert scalar half-precision (16-bit) floating-point element /// stored at memory locations starting at location \a __A to a /// single-precision (32-bit) floating-point, broadcast it to packed /// single-precision (32-bit) floating-point elements, and store the results in /// \a dst. /// /// \headerfile /// /// \code /// _mm_bcstnesh_ps(const void *__A); /// \endcode /// /// This intrinsic corresponds to the \c VBCSTNESH2PS instruction. /// /// \param __A /// A pointer to a 16-bit memory location. The address of the memory /// location does not have to be aligned. /// \returns /// A 128-bit vector of [4 x float]. /// /// \code{.operation} /// b := Convert_FP16_To_FP32(MEM[__A+15:__A]) /// FOR j := 0 to 3 /// m := j*32 /// dst[m+31:m] := b /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_bcstnesh_ps(const void *__A) { return (__m128)__builtin_ia32_vbcstnesh2ps128((const _Float16 *)__A); } /// Convert scalar half-precision (16-bit) floating-point element /// stored at memory locations starting at location \a __A to a /// single-precision (32-bit) floating-point, broadcast it to packed /// single-precision (32-bit) floating-point elements, and store the results in /// \a dst. /// /// \headerfile /// /// \code /// _mm256_bcstnesh_ps(const void *__A); /// \endcode /// /// This intrinsic corresponds to the \c VBCSTNESH2PS instruction. /// /// \param __A /// A pointer to a 16-bit memory location. The address of the memory /// location does not have to be aligned. /// \returns /// A 256-bit vector of [8 x float]. /// /// \code{.operation} /// b := Convert_FP16_To_FP32(MEM[__A+15:__A]) /// FOR j := 0 to 7 /// m := j*32 /// dst[m+31:m] := b /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_bcstnesh_ps(const void *__A) { return (__m256)__builtin_ia32_vbcstnesh2ps256((const _Float16 *)__A); } /// Convert packed BF16 (16-bit) floating-point even-indexed elements /// stored at memory locations starting at location \a __A to packed /// single-precision (32-bit) floating-point elements, and store the results in /// \a dst. /// /// \headerfile /// /// \code /// _mm_cvtneebf16_ps(const __m128bh *__A); /// \endcode /// /// This intrinsic corresponds to the \c VCVTNEEBF162PS instruction. /// /// \param __A /// A pointer to a 128-bit memory location containing 8 consecutive /// BF16 (16-bit) floating-point values. /// \returns /// A 128-bit vector of [4 x float]. /// /// \code{.operation} /// FOR j := 0 to 3 /// k := j*2 /// i := k*16 /// m := j*32 /// dst[m+31:m] := Convert_BF16_To_FP32(MEM[__A+i+15:__A+i]) /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtneebf16_ps(const __m128bh *__A) { return (__m128)__builtin_ia32_vcvtneebf162ps128((const __v8bf *)__A); } /// Convert packed BF16 (16-bit) floating-point even-indexed elements /// stored at memory locations starting at location \a __A to packed /// single-precision (32-bit) floating-point elements, and store the results in /// \a dst. /// /// \headerfile /// /// \code /// _mm256_cvtneebf16_ps(const __m256bh *__A); /// \endcode /// /// This intrinsic corresponds to the \c VCVTNEEBF162PS instruction. /// /// \param __A /// A pointer to a 256-bit memory location containing 16 consecutive /// BF16 (16-bit) floating-point values. /// \returns /// A 256-bit vector of [8 x float]. /// /// \code{.operation} /// FOR j := 0 to 7 /// k := j*2 /// i := k*16 /// m := j*32 /// dst[m+31:m] := Convert_BF16_To_FP32(MEM[__A+i+15:__A+i]) /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtneebf16_ps(const __m256bh *__A) { return (__m256)__builtin_ia32_vcvtneebf162ps256((const __v16bf *)__A); } /// Convert packed half-precision (16-bit) floating-point even-indexed elements /// stored at memory locations starting at location \a __A to packed /// single-precision (32-bit) floating-point elements, and store the results in /// \a dst. /// /// \headerfile /// /// \code /// _mm_cvtneeph_ps(const __m128h *__A); /// \endcode /// /// This intrinsic corresponds to the \c VCVTNEEPH2PS instruction. /// /// \param __A /// A pointer to a 128-bit memory location containing 8 consecutive /// half-precision (16-bit) floating-point values. /// \returns /// A 128-bit vector of [4 x float]. /// /// \code{.operation} /// FOR j := 0 to 3 /// k := j*2 /// i := k*16 /// m := j*32 /// dst[m+31:m] := Convert_FP16_To_FP32(MEM[__A+i+15:__A+i]) /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtneeph_ps(const __m128h *__A) { return (__m128)__builtin_ia32_vcvtneeph2ps128((const __v8hf *)__A); } /// Convert packed half-precision (16-bit) floating-point even-indexed elements /// stored at memory locations starting at location \a __A to packed /// single-precision (32-bit) floating-point elements, and store the results in /// \a dst. /// /// \headerfile /// /// \code /// _mm256_cvtneeph_ps(const __m256h *__A); /// \endcode /// /// This intrinsic corresponds to the \c VCVTNEEPH2PS instruction. /// /// \param __A /// A pointer to a 256-bit memory location containing 16 consecutive /// half-precision (16-bit) floating-point values. /// \returns /// A 256-bit vector of [8 x float]. /// /// \code{.operation} /// FOR j := 0 to 7 /// k := j*2 /// i := k*16 /// m := j*32 /// dst[m+31:m] := Convert_FP16_To_FP32(MEM[__A+i+15:__A+i]) /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtneeph_ps(const __m256h *__A) { return (__m256)__builtin_ia32_vcvtneeph2ps256((const __v16hf *)__A); } /// Convert packed BF16 (16-bit) floating-point odd-indexed elements /// stored at memory locations starting at location \a __A to packed /// single-precision (32-bit) floating-point elements, and store the results in /// \a dst. /// /// \headerfile /// /// \code /// _mm_cvtneobf16_ps(const __m128bh *__A); /// \endcode /// /// This intrinsic corresponds to the \c VCVTNEOBF162PS instruction. /// /// \param __A /// A pointer to a 128-bit memory location containing 8 consecutive /// BF16 (16-bit) floating-point values. /// \returns /// A 128-bit vector of [4 x float]. /// /// \code{.operation} /// FOR j := 0 to 3 /// k := j*2+1 /// i := k*16 /// m := j*32 /// dst[m+31:m] := Convert_BF16_To_FP32(MEM[__A+i+15:__A+i]) /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtneobf16_ps(const __m128bh *__A) { return (__m128)__builtin_ia32_vcvtneobf162ps128((const __v8bf *)__A); } /// Convert packed BF16 (16-bit) floating-point odd-indexed elements /// stored at memory locations starting at location \a __A to packed /// single-precision (32-bit) floating-point elements, and store the results in /// \a dst. /// /// \headerfile /// /// \code /// _mm256_cvtneobf16_ps(const __m256bh *__A); /// \endcode /// /// This intrinsic corresponds to the \c VCVTNEOBF162PS instruction. /// /// \param __A /// A pointer to a 256-bit memory location containing 16 consecutive /// BF16 (16-bit) floating-point values. /// \returns /// A 256-bit vector of [8 x float]. /// /// \code{.operation} /// FOR j := 0 to 7 /// k := j*2+1 /// i := k*16 /// m := j*32 /// dst[m+31:m] := Convert_BF16_To_FP32(MEM[__A+i+15:__A+i]) /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtneobf16_ps(const __m256bh *__A) { return (__m256)__builtin_ia32_vcvtneobf162ps256((const __v16bf *)__A); } /// Convert packed half-precision (16-bit) floating-point odd-indexed elements /// stored at memory locations starting at location \a __A to packed /// single-precision (32-bit) floating-point elements, and store the results in /// \a dst. /// /// \headerfile /// /// \code /// _mm_cvtneoph_ps(const __m128h *__A); /// \endcode /// /// This intrinsic corresponds to the \c VCVTNEOPH2PS instruction. /// /// \param __A /// A pointer to a 128-bit memory location containing 8 consecutive /// half-precision (16-bit) floating-point values. /// \returns /// A 128-bit vector of [4 x float]. /// /// \code{.operation} /// FOR j := 0 to 3 /// k := j*2+1 /// i := k*16 /// m := j*32 /// dst[m+31:m] := Convert_FP16_To_FP32(MEM[__A+i+15:__A+i]) /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtneoph_ps(const __m128h *__A) { return (__m128)__builtin_ia32_vcvtneoph2ps128((const __v8hf *)__A); } /// Convert packed half-precision (16-bit) floating-point odd-indexed elements /// stored at memory locations starting at location \a __A to packed /// single-precision (32-bit) floating-point elements, and store the results in /// \a dst. /// /// \headerfile /// /// \code /// _mm256_cvtneoph_ps(const __m256h *__A); /// \endcode /// /// This intrinsic corresponds to the \c VCVTNEOPH2PS instruction. /// /// \param __A /// A pointer to a 256-bit memory location containing 16 consecutive /// half-precision (16-bit) floating-point values. /// \returns /// A 256-bit vector of [8 x float]. /// /// \code{.operation} /// FOR j := 0 to 7 /// k := j*2+1 /// i := k*16 /// m := j*32 /// dst[m+31:m] := Convert_FP16_To_FP32(MEM[__A+i+15:__A+i]) /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtneoph_ps(const __m256h *__A) { return (__m256)__builtin_ia32_vcvtneoph2ps256((const __v16hf *)__A); } /// Convert packed single-precision (32-bit) floating-point elements in \a __A /// to packed BF16 (16-bit) floating-point elements, and store the results in \a /// dst. /// /// \headerfile /// /// \code /// _mm_cvtneps_avx_pbh(__m128 __A); /// \endcode /// /// This intrinsic corresponds to the \c VCVTNEPS2BF16 instruction. /// /// \param __A /// A 128-bit vector of [4 x float]. /// \returns /// A 128-bit vector of [8 x bfloat]. /// /// \code{.operation} /// FOR j := 0 to 3 /// dst.word[j] := Convert_FP32_To_BF16(__A.fp32[j]) /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_cvtneps_avx_pbh(__m128 __A) { return (__m128bh)__builtin_ia32_vcvtneps2bf16128((__v4sf)__A); } /// Convert packed single-precision (32-bit) floating-point elements in \a __A /// to packed BF16 (16-bit) floating-point elements, and store the results in \a /// dst. /// /// \headerfile /// /// \code /// _mm256_cvtneps_avx_pbh(__m256 __A); /// \endcode /// /// This intrinsic corresponds to the \c VCVTNEPS2BF16 instruction. /// /// \param __A /// A 256-bit vector of [8 x float]. /// \returns /// A 128-bit vector of [8 x bfloat]. /// /// \code{.operation} /// FOR j := 0 to 7 /// dst.word[j] := Convert_FP32_To_BF16(a.fp32[j]) /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128bh __DEFAULT_FN_ATTRS256 _mm256_cvtneps_avx_pbh(__m256 __A) { return (__m128bh)__builtin_ia32_vcvtneps2bf16256((__v8sf)__A); } #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #endif // __AVXNECONVERTINTRIN_H #endif // __SSE2__ /builtins/avxvnniint16intrin.h/*===----------- avxvnniint16intrin.h - AVXVNNIINT16 intrinsics-------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error \ "Never use directly; include instead." #endif // __IMMINTRIN_H #ifndef __AVXVNNIINT16INTRIN_H #define __AVXVNNIINT16INTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, __target__("avxvnniint16"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, __target__("avxvnniint16"), \ __min_vector_width__(256))) /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with /// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate /// signed 16-bit results. Sum these 2 results with the corresponding /// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. /// /// \headerfile /// /// \code /// __m128i _mm_dpwsud_epi32(__m128i __W, __m128i __A, __m128i __B) /// \endcode /// /// This intrinsic corresponds to the \c VPDPWSUD instruction. /// /// \param __W /// A 128-bit vector of [4 x int]. /// \param __A /// A 128-bit vector of [8 x short]. /// \param __B /// A 128-bit vector of [8 x unsigned short]. /// \returns /// A 128-bit vector of [4 x int]. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) /// tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) /// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwsud_epi32(__m128i __W, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpwsud128((__v4si)__W, (__v4si)__A, (__v4si)__B); } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with /// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate /// signed 16-bit results. Sum these 2 results with the corresponding /// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. /// /// \headerfile /// /// \code /// __m256i _mm256_dpwsud_epi32(__m256i __W, __m256i __A, __m256i __B) /// \endcode /// /// This intrinsic corresponds to the \c VPDPWSUD instruction. /// /// \param __W /// A 256-bit vector of [8 x int]. /// \param __A /// A 256-bit vector of [16 x short]. /// \param __B /// A 256-bit vector of [16 x unsigned short]. /// \returns /// A 256-bit vector of [8 x int]. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) /// tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) /// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwsud_epi32(__m256i __W, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpwsud256((__v8si)__W, (__v8si)__A, (__v8si)__B); } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with /// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate /// signed 16-bit results. Sum these 2 results with the corresponding /// 32-bit integer in \a __W with signed saturation, and store the packed /// 32-bit results in \a dst. /// /// \headerfile /// /// \code /// __m128i _mm_dpwsuds_epi32(__m128i __W, __m128i __A, __m128i __B) /// \endcode /// /// This intrinsic corresponds to the \c VPDPWSUDS instruction. /// /// \param __W /// A 128-bit vector of [4 x int]. /// \param __A /// A 128-bit vector of [8 x short]. /// \param __B /// A 128-bit vector of [8 x unsigned short]. /// \returns /// A 128-bit vector of [4 x int]. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) /// tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) /// dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwsuds_epi32(__m128i __W, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpwsuds128((__v4si)__W, (__v4si)__A, (__v4si)__B); } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with /// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate /// signed 16-bit results. Sum these 2 results with the corresponding /// 32-bit integer in \a __W with signed saturation, and store the packed /// 32-bit results in \a dst. /// /// \headerfile /// /// \code /// __m256i _mm256_dpwsuds_epi32(__m256i __W, __m256i __A, __m256i __B) /// \endcode /// /// This intrinsic corresponds to the \c VPDPWSUDS instruction. /// /// \param __W /// A 256-bit vector of [8 x int]. /// \param __A /// A 256-bit vector of [16 x short]. /// \param __B /// A 256-bit vector of [16 x unsigned short]. /// \returns /// A 256-bit vector of [8 x int]. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) /// tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) /// dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwsuds_epi32(__m256i __W, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpwsuds256((__v8si)__W, (__v8si)__A, (__v8si)__B); } /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with /// corresponding signed 16-bit integers in \a __B, producing 2 intermediate /// signed 16-bit results. Sum these 2 results with the corresponding /// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. /// /// \headerfile /// /// \code /// __m128i _mm_dpbusd_epi32(__m128i __W, __m128i __A, __m128i __B) /// \endcode /// /// This intrinsic corresponds to the \c VPDPWUSD instruction. /// /// \param __W /// A 128-bit vector of [4 x int]. /// \param __A /// A 128-bit vector of [8 x unsigned short]. /// \param __B /// A 128-bit vector of [8 x short]. /// \returns /// A 128-bit vector of [4 x int]. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) /// tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) /// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwusd_epi32(__m128i __W, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpwusd128((__v4si)__W, (__v4si)__A, (__v4si)__B); } /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with /// corresponding signed 16-bit integers in \a __B, producing 2 intermediate /// signed 16-bit results. Sum these 2 results with the corresponding /// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. /// /// \headerfile /// /// \code /// __m256i _mm256_dpwusd_epi32(__m256i __W, __m256i __A, __m256i __B) /// \endcode /// /// This intrinsic corresponds to the \c VPDPWUSD instruction. /// /// \param __W /// A 256-bit vector of [8 x int]. /// \param __A /// A 256-bit vector of [16 x unsigned short]. /// \param __B /// A 256-bit vector of [16 x short]. /// \returns /// A 256-bit vector of [8 x int]. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) /// tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) /// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwusd_epi32(__m256i __W, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpwusd256((__v8si)__W, (__v8si)__A, (__v8si)__B); } /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with /// corresponding signed 16-bit integers in \a __B, producing 2 intermediate /// signed 16-bit results. Sum these 2 results with the corresponding /// 32-bit integer in \a __W with signed saturation, and store the packed /// 32-bit results in \a dst. /// /// \headerfile /// /// \code /// __m128i _mm_dpwusds_epi32(__m128i __W, __m128i __A, __m128i __B) /// \endcode /// /// This intrinsic corresponds to the \c VPDPWSUDS instruction. /// /// \param __W /// A 128-bit vector of [4 x int]. /// \param __A /// A 128-bit vector of [8 x unsigned short]. /// \param __B /// A 128-bit vector of [8 x short]. /// \returns /// A 128-bit vector of [4 x int]. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) /// tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) /// dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwusds_epi32(__m128i __W, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpwusds128((__v4si)__W, (__v4si)__A, (__v4si)__B); } /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with /// corresponding signed 16-bit integers in \a __B, producing 2 intermediate /// signed 16-bit results. Sum these 2 results with the corresponding /// 32-bit integer in \a __W with signed saturation, and store the packed /// 32-bit results in \a dst. /// /// \headerfile /// /// \code /// __m256i _mm256_dpwsuds_epi32(__m256i __W, __m256i __A, __m256i __B) /// \endcode /// /// This intrinsic corresponds to the \c VPDPWSUDS instruction. /// /// \param __W /// A 256-bit vector of [8 x int]. /// \param __A /// A 256-bit vector of [16 x unsigned short]. /// \param __B /// A 256-bit vector of [16 x short]. /// \returns /// A 256-bit vector of [8 x int]. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) /// tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) /// dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwusds_epi32(__m256i __W, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpwusds256((__v8si)__W, (__v8si)__A, (__v8si)__B); } /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with /// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate /// signed 16-bit results. Sum these 2 results with the corresponding /// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. /// /// \headerfile /// /// \code /// __m128i _mm_dpwuud_epi32(__m128i __W, __m128i __A, __m128i __B) /// \endcode /// /// This intrinsic corresponds to the \c VPDPWUUD instruction. /// /// \param __W /// A 128-bit vector of [4 x unsigned int]. /// \param __A /// A 128-bit vector of [8 x unsigned short]. /// \param __B /// A 128-bit vector of [8 x unsigned short]. /// \returns /// A 128-bit vector of [4 x unsigned int]. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) /// tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) /// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwuud_epi32(__m128i __W, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpwuud128((__v4si)__W, (__v4si)__A, (__v4si)__B); } /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with /// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate /// signed 16-bit results. Sum these 2 results with the corresponding /// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. /// /// \headerfile /// /// \code /// __m256i _mm256_dpwuud_epi32(__m256i __W, __m256i __A, __m256i __B) /// \endcode /// /// This intrinsic corresponds to the \c VPDPWUUD instruction. /// /// \param __W /// A 256-bit vector of [8 x unsigned int]. /// \param __A /// A 256-bit vector of [16 x unsigned short]. /// \param __B /// A 256-bit vector of [16 x unsigned short]. /// \returns /// A 256-bit vector of [8 x unsigned int]. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) /// tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) /// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwuud_epi32(__m256i __W, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpwuud256((__v8si)__W, (__v8si)__A, (__v8si)__B); } /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with /// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate /// signed 16-bit results. Sum these 2 results with the corresponding /// 32-bit integer in \a __W with signed saturation, and store the packed /// 32-bit results in \a dst. /// /// \headerfile /// /// \code /// __m128i _mm_dpwsuds_epi32(__m128i __W, __m128i __A, __m128i __B) /// \endcode /// /// This intrinsic corresponds to the \c VPDPWSUDS instruction. /// /// \param __W /// A 128-bit vector of [4 x unsigned int]. /// \param __A /// A 128-bit vector of [8 x unsigned short]. /// \param __B /// A 128-bit vector of [8 x unsigned short]. /// \returns /// A 128-bit vector of [4 x unsigned int]. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) /// tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) /// dst.dword[j] := UNSIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwuuds_epi32(__m128i __W, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpwuuds128((__v4si)__W, (__v4si)__A, (__v4si)__B); } /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with /// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate /// signed 16-bit results. Sum these 2 results with the corresponding /// 32-bit integer in \a __W with signed saturation, and store the packed /// 32-bit results in \a dst. /// /// \headerfile /// /// \code /// __m256i _mm256_dpwuuds_epi32(__m256i __W, __m256i __A, __m256i __B) /// \endcode /// /// This intrinsic corresponds to the \c VPDPWSUDS instruction. /// /// \param __W /// A 256-bit vector of [8 x unsigned int]. /// \param __A /// A 256-bit vector of [16 x unsigned short]. /// \param __B /// A 256-bit vector of [16 x unsigned short]. /// \returns /// A 256-bit vector of [8 x unsigned int]. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) /// tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) /// dst.dword[j] := UNSIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwuuds_epi32(__m256i __W, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpwuuds256((__v8si)__W, (__v8si)__A, (__v8si)__B); } #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #endif // __AVXVNNIINT16INTRIN_H /*===-------- avxvnniint8intrin.h - AVXVNNIINT8 intrinsics -----------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error \ "Never use directly; include instead." #endif #ifndef __AVXVNNIINT8INTRIN_H #define __AVXVNNIINT8INTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, __target__("avxvnniint8"), \ __min_vector_width__(256))) #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, __target__("avxvnniint8"), \ __min_vector_width__(128))) /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding signed 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding /// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. /// /// \headerfile /// /// \code /// _mm_dpbssd_epi32(__m128i __W, __m128i __A, __m128i __B); /// \endcode /// /// This intrinsic corresponds to the \c VPDPBSSD instruction. /// /// \param __A /// A 128-bit vector of [16 x char]. /// \param __B /// A 128-bit vector of [16 x char]. /// \returns /// A 128-bit vector of [4 x int]. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.word := SignExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j]) /// tmp2.word := SignExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1]) /// tmp3.word := SignExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2]) /// tmp4.word := SignExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3]) /// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbssd_epi32(__m128i __W, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpbssd128((__v4si)__W, (__v4si)__A, (__v4si)__B); } /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding signed 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding /// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. /// /// \headerfile /// /// \code /// _mm256_dpbssd_epi32(__m256i __W, __m256i __A, __m256i __B); /// \endcode /// /// This intrinsic corresponds to the \c VPDPBSSD instruction. /// /// \param __A /// A 256-bit vector of [32 x char]. /// \param __B /// A 256-bit vector of [32 x char]. /// \returns /// A 256-bit vector of [8 x int]. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.word := SignExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j]) /// tmp2.word := SignExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1]) /// tmp3.word := SignExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2]) /// tmp4.word := SignExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3]) /// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpbssd_epi32(__m256i __W, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpbssd256((__v8si)__W, (__v8si)__A, (__v8si)__B); } /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding signed 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding /// 32-bit integer in \a __W with signed saturation, and store the packed /// 32-bit results in \a dst. /// /// \headerfile /// /// \code /// _mm_dpbssds_epi32( __m128i __W, __m128i __A, __m128i __B); /// \endcode /// /// This intrinsic corresponds to the \c VPDPBSSD instruction. /// /// \param __A /// A 128-bit vector of [16 x char]. /// \param __B /// A 128-bit vector of [16 x char]. /// \returns /// A 128-bit vector of [4 x int]. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.word := SignExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j]) /// tmp2.word := SignExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1]) /// tmp3.word := SignExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2]) /// tmp4.word := SignExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3]) /// dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbssds_epi32(__m128i __W, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpbssds128((__v4si)__W, (__v4si)__A, (__v4si)__B); } /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding signed 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding /// 32-bit integer in \a __W with signed saturation, and store the packed /// 32-bit results in \a dst. /// /// \headerfile /// /// \code /// _mm256_dpbssds_epi32(__m256i __W, __m256i __A, __m256i __B); /// \endcode /// /// This intrinsic corresponds to the \c VPDPBSSD instruction. /// /// \param __A /// A 256-bit vector of [32 x char]. /// \param __B /// A 256-bit vector of [32 x char]. /// \returns /// A 256-bit vector of [8 x int]. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.word := SignExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j]) /// tmp2.word := SignExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1]) /// tmp3.word := SignExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2]) /// tmp4.word := SignExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3]) /// dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpbssds_epi32(__m256i __W, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpbssds256((__v8si)__W, (__v8si)__A, (__v8si)__B); } /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding /// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. /// /// \headerfile /// /// \code /// _mm_dpbsud_epi32(__m128i __W, __m128i __A, __m128i __B); /// \endcode /// /// This intrinsic corresponds to the \c VPDPBSSD instruction. /// /// \param __A /// A 128-bit vector of [16 x char]. /// \param __B /// A 128-bit vector of [16 x unsigned char]. /// \returns /// A 128-bit vector of [4 x int]. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.word := Signed(SignExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j])) /// tmp2.word := Signed(SignExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1])) /// tmp3.word := Signed(SignExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2])) /// tmp4.word := Signed(SignExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3])) /// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbsud_epi32(__m128i __W, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpbsud128((__v4si)__W, (__v4si)__A, (__v4si)__B); } /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding /// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. /// /// \headerfile /// /// \code /// _mm256_dpbsud_epi32(__m256i __W, __m256i __A, __m256i __B); /// \endcode /// /// This intrinsic corresponds to the \c VPDPBSSD instruction. /// /// \param __A /// A 256-bit vector of [32 x char]. /// \param __B /// A 256-bit vector of [32 x unsigned char]. /// \returns /// A 256-bit vector of [8 x int]. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.word := Signed(SignExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j])) /// tmp2.word := Signed(SignExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1])) /// tmp3.word := Signed(SignExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2])) /// tmp4.word := Signed(SignExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3])) /// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpbsud_epi32(__m256i __W, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpbsud256((__v8si)__W, (__v8si)__A, (__v8si)__B); } /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding /// 32-bit integer in \a __W with signed saturation, and store the packed /// 32-bit results in \a dst. /// /// \headerfile /// /// \code /// _mm_dpbsuds_epi32( __m128i __W, __m128i __A, __m128i __B); /// \endcode /// /// This intrinsic corresponds to the \c VPDPBSSD instruction. /// /// \param __A /// A 128-bit vector of [16 x char]. /// \param __B /// A 128-bit vector of [16 x unsigned char]. /// \returns /// A 128-bit vector of [4 x int]. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.word := Signed(SignExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j])) /// tmp2.word := Signed(SignExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1])) /// tmp3.word := Signed(SignExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2])) /// tmp4.word := Signed(SignExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3])) /// dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbsuds_epi32(__m128i __W, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpbsuds128((__v4si)__W, (__v4si)__A, (__v4si)__B); } /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding /// 32-bit integer in \a __W with signed saturation, and store the packed /// 32-bit results in \a dst. /// /// \headerfile /// /// \code /// _mm256_dpbsuds_epi32(__m256i __W, __m256i __A, __m256i __B); /// \endcode /// /// This intrinsic corresponds to the \c VPDPBSSD instruction. /// /// \param __A /// A 256-bit vector of [32 x char]. /// \param __B /// A 256-bit vector of [32 x unsigned char]. /// \returns /// A 256-bit vector of [8 x int]. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.word := Signed(SignExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j])) /// tmp2.word := Signed(SignExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1])) /// tmp3.word := Signed(SignExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2])) /// tmp4.word := Signed(SignExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3])) /// dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpbsuds_epi32(__m256i __W, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpbsuds256((__v8si)__W, (__v8si)__A, (__v8si)__B); } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding /// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. /// /// \headerfile /// /// \code /// _mm_dpbuud_epi32(__m128i __W, __m128i __A, __m128i __B); /// \endcode /// /// This intrinsic corresponds to the \c VPDPBSSD instruction. /// /// \param __A /// A 128-bit vector of [16 x unsigned char]. /// \param __B /// A 128-bit vector of [16 x unsigned char]. /// \returns /// A 128-bit vector of [4 x int]. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.word := ZeroExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j]) /// tmp2.word := ZeroExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1]) /// tmp3.word := ZeroExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2]) /// tmp4.word := ZeroExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3]) /// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbuud_epi32(__m128i __W, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpbuud128((__v4si)__W, (__v4si)__A, (__v4si)__B); } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding /// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. /// /// \headerfile /// /// \code /// _mm256_dpbuud_epi32(__m256i __W, __m256i __A, __m256i __B); /// \endcode /// /// This intrinsic corresponds to the \c VPDPBSSD instruction. /// /// \param __A /// A 256-bit vector of [32 x unsigned char]. /// \param __B /// A 256-bit vector of [32 x unsigned char]. /// \returns /// A 256-bit vector of [8 x int]. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.word := ZeroExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j]) /// tmp2.word := ZeroExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1]) /// tmp3.word := ZeroExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2]) /// tmp4.word := ZeroExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3]) /// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpbuud_epi32(__m256i __W, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpbuud256((__v8si)__W, (__v8si)__A, (__v8si)__B); } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding /// 32-bit integer in \a __W with signed saturation, and store the packed /// 32-bit results in \a dst. /// /// \headerfile /// /// \code /// _mm_dpbuuds_epi32( __m128i __W, __m128i __A, __m128i __B); /// \endcode /// /// This intrinsic corresponds to the \c VPDPBUUDS instruction. /// /// \param __A /// A 128-bit vector of [16 x unsigned char]. /// \param __B /// A 128-bit vector of [16 x unsigned char]. /// \returns /// A 128-bit vector of [4 x int]. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.word := ZeroExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j]) /// tmp2.word := ZeroExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1]) /// tmp3.word := ZeroExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2]) /// tmp4.word := ZeroExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3]) /// dst.dword[j] := UNSIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) /// ENDFOR /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbuuds_epi32(__m128i __W, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpbuuds128((__v4si)__W, (__v4si)__A, (__v4si)__B); } /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding /// 32-bit integer in \a __W with signed saturation, and store the packed /// 32-bit results in \a dst. /// /// \headerfile /// /// \code /// _mm256_dpbuuds_epi32(__m256i __W, __m256i __A, __m256i __B); /// \endcode /// /// This intrinsic corresponds to the \c VPDPBUUDS instruction. /// /// \param __A /// A 256-bit vector of [32 x unsigned char]. /// \param __B /// A 256-bit vector of [32 x unsigned char]. /// \returns /// A 256-bit vector of [8 x int]. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.word := ZeroExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j]) /// tmp2.word := ZeroExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1]) /// tmp3.word := ZeroExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2]) /// tmp4.word := ZeroExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3]) /// dst.dword[j] := UNSIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) /// ENDFOR /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpbuuds_epi32(__m256i __W, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpbuuds256((__v8si)__W, (__v8si)__A, (__v8si)__B); } #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #endif // __AVXVNNIINT8INTRIN_H /*===--------------- avxvnniintrin.h - VNNI intrinsics --------------------=== * * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __AVXVNNIINTRIN_H #define __AVXVNNIINTRIN_H /* Below intrinsics defined in avx512vlvnniintrin.h can be used for AVXVNNI */ /// \fn __m256i _mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) /// \fn __m256i _mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) /// \fn __m256i _mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) /// \fn __m256i _mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) /// \fn __m128i _mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) /// \fn __m128i _mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) /// \fn __m128i _mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) /// \fn __m128i _mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) /* Intrinsics with _avx_ prefix are for compatibility with msvc. */ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avxvnni"), __min_vector_width__(256))) #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avxvnni"), __min_vector_width__(128))) /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with /// corresponding signed 8-bit integers in \a __B, producing 4 intermediate signed /// 16-bit results. Sum these 4 results with the corresponding 32-bit integer /// in \a __S, and store the packed 32-bit results in DST. /// /// This intrinsic corresponds to the VPDPBUSD instructions. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.word := Signed(ZeroExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j])) /// tmp2.word := Signed(ZeroExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1])) /// tmp3.word := Signed(ZeroExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2])) /// tmp4.word := Signed(ZeroExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3])) /// DST.dword[j] := __S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 /// ENDFOR /// DST[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpbusd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpbusd256((__v8si)__S, (__v8si)__A, (__v8si)__B); } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with /// corresponding signed 8-bit integers in \a __B, producing 4 intermediate signed /// 16-bit results. Sum these 4 results with the corresponding 32-bit integer /// in \a __S using signed saturation, and store the packed 32-bit results in DST. /// /// This intrinsic corresponds to the VPDPBUSDS instructions. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.word := Signed(ZeroExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j])) /// tmp2.word := Signed(ZeroExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1])) /// tmp3.word := Signed(ZeroExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2])) /// tmp4.word := Signed(ZeroExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3])) /// DST.dword[j] := Saturate32(__S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) /// ENDFOR /// DST[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpbusds256((__v8si)__S, (__v8si)__A, (__v8si)__B); } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with /// corresponding 16-bit integers in \a __B, producing 2 intermediate signed 32-bit /// results. Sum these 2 results with the corresponding 32-bit integer in \a __S, /// and store the packed 32-bit results in DST. /// /// This intrinsic corresponds to the VPDPWSSD instructions. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.dword := SignExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) /// tmp2.dword := SignExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) /// DST.dword[j] := __S.dword[j] + tmp1 + tmp2 /// ENDFOR /// DST[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwssd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpwssd256((__v8si)__S, (__v8si)__A, (__v8si)__B); } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with /// corresponding 16-bit integers in \a __B, producing 2 intermediate signed 32-bit /// results. Sum these 2 results with the corresponding 32-bit integer in \a __S /// using signed saturation, and store the packed 32-bit results in DST. /// /// This intrinsic corresponds to the VPDPWSSDS instructions. /// /// \code{.operation} /// FOR j := 0 to 7 /// tmp1.dword := SignExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) /// tmp2.dword := SignExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) /// DST.dword[j] := Saturate32(__S.dword[j] + tmp1 + tmp2) /// ENDFOR /// DST[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpwssds256((__v8si)__S, (__v8si)__A, (__v8si)__B); } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with /// corresponding signed 8-bit integers in \a __B, producing 4 intermediate signed /// 16-bit results. Sum these 4 results with the corresponding 32-bit integer /// in \a __S, and store the packed 32-bit results in DST. /// /// This intrinsic corresponds to the VPDPBUSD instructions. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.word := Signed(ZeroExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j])) /// tmp2.word := Signed(ZeroExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1])) /// tmp3.word := Signed(ZeroExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2])) /// tmp4.word := Signed(ZeroExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3])) /// DST.dword[j] := __S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 /// ENDFOR /// DST[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbusd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpbusd128((__v4si)__S, (__v4si)__A, (__v4si)__B); } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with /// corresponding signed 8-bit integers in \a __B, producing 4 intermediate signed /// 16-bit results. Sum these 4 results with the corresponding 32-bit integer /// in \a __S using signed saturation, and store the packed 32-bit results in DST. /// /// This intrinsic corresponds to the VPDPBUSDS instructions. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.word := Signed(ZeroExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j])) /// tmp2.word := Signed(ZeroExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1])) /// tmp3.word := Signed(ZeroExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2])) /// tmp4.word := Signed(ZeroExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3])) /// DST.dword[j] := Saturate32(__S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) /// ENDFOR /// DST[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpbusds128((__v4si)__S, (__v4si)__A, (__v4si)__B); } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with /// corresponding 16-bit integers in \a __B, producing 2 intermediate signed 32-bit /// results. Sum these 2 results with the corresponding 32-bit integer in \a __S, /// and store the packed 32-bit results in DST. /// /// This intrinsic corresponds to the VPDPWSSD instructions. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.dword := SignExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) /// tmp2.dword := SignExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) /// DST.dword[j] := __S.dword[j] + tmp1 + tmp2 /// ENDFOR /// DST[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwssd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpwssd128((__v4si)__S, (__v4si)__A, (__v4si)__B); } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with /// corresponding 16-bit integers in \a __B, producing 2 intermediate signed 32-bit /// results. Sum these 2 results with the corresponding 32-bit integer in \a __S /// using signed saturation, and store the packed 32-bit results in DST. /// /// This intrinsic corresponds to the VPDPWSSDS instructions. /// /// \code{.operation} /// FOR j := 0 to 3 /// tmp1.dword := SignExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) /// tmp2.dword := SignExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) /// DST.dword[j] := Saturate32(__S.dword[j] + tmp1 + tmp2) /// ENDFOR /// DST[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwssds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpwssds128((__v4si)__S, (__v4si)__A, (__v4si)__B); } #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #endif // __AVXVNNIINTRIN_H /*===---- bmi2intrin.h - BMI2 intrinsics -----------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __BMI2INTRIN_H #define __BMI2INTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi2"))) /// Copies the unsigned 32-bit integer \a __X and zeroes the upper bits /// starting at bit number \a __Y. /// /// \code{.operation} /// i := __Y[7:0] /// result := __X /// IF i < 32 /// result[31:i] := 0 /// FI /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c BZHI instruction. /// /// \param __X /// The 32-bit source value to copy. /// \param __Y /// The lower 8 bits specify the bit number of the lowest bit to zero. /// \returns The partially zeroed 32-bit value. static __inline__ unsigned int __DEFAULT_FN_ATTRS _bzhi_u32(unsigned int __X, unsigned int __Y) { return __builtin_ia32_bzhi_si(__X, __Y); } /// Deposit (scatter) low-order bits from the unsigned 32-bit integer \a __X /// into the 32-bit result, according to the mask in the unsigned 32-bit /// integer \a __Y. All other bits of the result are zero. /// /// \code{.operation} /// i := 0 /// result := 0 /// FOR m := 0 TO 31 /// IF __Y[m] == 1 /// result[m] := __X[i] /// i := i + 1 /// ENDIF /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c PDEP instruction. /// /// \param __X /// The 32-bit source value to copy. /// \param __Y /// The 32-bit mask specifying where to deposit source bits. /// \returns The 32-bit result. static __inline__ unsigned int __DEFAULT_FN_ATTRS _pdep_u32(unsigned int __X, unsigned int __Y) { return __builtin_ia32_pdep_si(__X, __Y); } /// Extract (gather) bits from the unsigned 32-bit integer \a __X into the /// low-order bits of the 32-bit result, according to the mask in the /// unsigned 32-bit integer \a __Y. All other bits of the result are zero. /// /// \code{.operation} /// i := 0 /// result := 0 /// FOR m := 0 TO 31 /// IF __Y[m] == 1 /// result[i] := __X[m] /// i := i + 1 /// ENDIF /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c PEXT instruction. /// /// \param __X /// The 32-bit source value to copy. /// \param __Y /// The 32-bit mask specifying which source bits to extract. /// \returns The 32-bit result. static __inline__ unsigned int __DEFAULT_FN_ATTRS _pext_u32(unsigned int __X, unsigned int __Y) { return __builtin_ia32_pext_si(__X, __Y); } /// Multiplies the unsigned 32-bit integers \a __X and \a __Y to form a /// 64-bit product. Stores the upper 32 bits of the product in the /// memory at \a __P and returns the lower 32 bits. /// /// \code{.operation} /// Store32(__P, (__X * __Y)[63:32]) /// result := (__X * __Y)[31:0] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c MULX instruction. /// /// \param __X /// An unsigned 32-bit multiplicand. /// \param __Y /// An unsigned 32-bit multiplicand. /// \param __P /// A pointer to memory for storing the upper half of the product. /// \returns The lower half of the product. static __inline__ unsigned int __DEFAULT_FN_ATTRS _mulx_u32(unsigned int __X, unsigned int __Y, unsigned int *__P) { unsigned long long __res = (unsigned long long) __X * __Y; *__P = (unsigned int)(__res >> 32); return (unsigned int)__res; } #ifdef __x86_64__ /// Copies the unsigned 64-bit integer \a __X and zeroes the upper bits /// starting at bit number \a __Y. /// /// \code{.operation} /// i := __Y[7:0] /// result := __X /// IF i < 64 /// result[63:i] := 0 /// FI /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c BZHI instruction. /// /// \param __X /// The 64-bit source value to copy. /// \param __Y /// The lower 8 bits specify the bit number of the lowest bit to zero. /// \returns The partially zeroed 64-bit value. static __inline__ unsigned long long __DEFAULT_FN_ATTRS _bzhi_u64(unsigned long long __X, unsigned long long __Y) { return __builtin_ia32_bzhi_di(__X, __Y); } /// Deposit (scatter) low-order bits from the unsigned 64-bit integer \a __X /// into the 64-bit result, according to the mask in the unsigned 64-bit /// integer \a __Y. All other bits of the result are zero. /// /// \code{.operation} /// i := 0 /// result := 0 /// FOR m := 0 TO 63 /// IF __Y[m] == 1 /// result[m] := __X[i] /// i := i + 1 /// ENDIF /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c PDEP instruction. /// /// \param __X /// The 64-bit source value to copy. /// \param __Y /// The 64-bit mask specifying where to deposit source bits. /// \returns The 64-bit result. static __inline__ unsigned long long __DEFAULT_FN_ATTRS _pdep_u64(unsigned long long __X, unsigned long long __Y) { return __builtin_ia32_pdep_di(__X, __Y); } /// Extract (gather) bits from the unsigned 64-bit integer \a __X into the /// low-order bits of the 64-bit result, according to the mask in the /// unsigned 64-bit integer \a __Y. All other bits of the result are zero. /// /// \code{.operation} /// i := 0 /// result := 0 /// FOR m := 0 TO 63 /// IF __Y[m] == 1 /// result[i] := __X[m] /// i := i + 1 /// ENDIF /// ENDFOR /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c PEXT instruction. /// /// \param __X /// The 64-bit source value to copy. /// \param __Y /// The 64-bit mask specifying which source bits to extract. /// \returns The 64-bit result. static __inline__ unsigned long long __DEFAULT_FN_ATTRS _pext_u64(unsigned long long __X, unsigned long long __Y) { return __builtin_ia32_pext_di(__X, __Y); } /// Multiplies the unsigned 64-bit integers \a __X and \a __Y to form a /// 128-bit product. Stores the upper 64 bits of the product to the /// memory addressed by \a __P and returns the lower 64 bits. /// /// \code{.operation} /// Store64(__P, (__X * __Y)[127:64]) /// result := (__X * __Y)[63:0] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c MULX instruction. /// /// \param __X /// An unsigned 64-bit multiplicand. /// \param __Y /// An unsigned 64-bit multiplicand. /// \param __P /// A pointer to memory for storing the upper half of the product. /// \returns The lower half of the product. static __inline__ unsigned long long __DEFAULT_FN_ATTRS _mulx_u64 (unsigned long long __X, unsigned long long __Y, unsigned long long *__P) { unsigned __int128 __res = (unsigned __int128) __X * __Y; *__P = (unsigned long long) (__res >> 64); return (unsigned long long) __res; } #endif /* __x86_64__ */ #undef __DEFAULT_FN_ATTRS #endif /* __BMI2INTRIN_H */ /*===---- bmiintrin.h - BMI intrinsics -------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #if !defined __X86INTRIN_H && !defined __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __BMIINTRIN_H #define __BMIINTRIN_H /* Allow using the tzcnt intrinsics even for non-BMI targets. Since the TZCNT instruction behaves as BSF on non-BMI targets, there is code that expects to use it as a potentially faster version of BSF. */ #define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) #define _tzcnt_u16(a) (__tzcnt_u16((a))) /// Counts the number of trailing zero bits in the operand. /// /// \headerfile /// /// This intrinsic corresponds to the TZCNT instruction. /// /// \param __X /// An unsigned 16-bit integer whose trailing zeros are to be counted. /// \returns An unsigned 16-bit integer containing the number of trailing zero /// bits in the operand. static __inline__ unsigned short __RELAXED_FN_ATTRS __tzcnt_u16(unsigned short __X) { return __builtin_ia32_tzcnt_u16(__X); } /// Counts the number of trailing zero bits in the operand. /// /// \headerfile /// /// This intrinsic corresponds to the TZCNT instruction. /// /// \param __X /// An unsigned 32-bit integer whose trailing zeros are to be counted. /// \returns An unsigned 32-bit integer containing the number of trailing zero /// bits in the operand. /// \see _mm_tzcnt_32 static __inline__ unsigned int __RELAXED_FN_ATTRS __tzcnt_u32(unsigned int __X) { return __builtin_ia32_tzcnt_u32(__X); } /// Counts the number of trailing zero bits in the operand. /// /// \headerfile /// /// This intrinsic corresponds to the TZCNT instruction. /// /// \param __X /// An unsigned 32-bit integer whose trailing zeros are to be counted. /// \returns An 32-bit integer containing the number of trailing zero bits in /// the operand. /// \see __tzcnt_u32 static __inline__ int __RELAXED_FN_ATTRS _mm_tzcnt_32(unsigned int __X) { return (int)__builtin_ia32_tzcnt_u32(__X); } #define _tzcnt_u32(a) (__tzcnt_u32((a))) #ifdef __x86_64__ /// Counts the number of trailing zero bits in the operand. /// /// \headerfile /// /// This intrinsic corresponds to the TZCNT instruction. /// /// \param __X /// An unsigned 64-bit integer whose trailing zeros are to be counted. /// \returns An unsigned 64-bit integer containing the number of trailing zero /// bits in the operand. /// \see _mm_tzcnt_64 static __inline__ unsigned long long __RELAXED_FN_ATTRS __tzcnt_u64(unsigned long long __X) { return __builtin_ia32_tzcnt_u64(__X); } /// Counts the number of trailing zero bits in the operand. /// /// \headerfile /// /// This intrinsic corresponds to the TZCNT instruction. /// /// \param __X /// An unsigned 64-bit integer whose trailing zeros are to be counted. /// \returns An 64-bit integer containing the number of trailing zero bits in /// the operand. /// \see __tzcnt_u64 static __inline__ long long __RELAXED_FN_ATTRS _mm_tzcnt_64(unsigned long long __X) { return (long long)__builtin_ia32_tzcnt_u64(__X); } #define _tzcnt_u64(a) (__tzcnt_u64((a))) #endif /* __x86_64__ */ #undef __RELAXED_FN_ATTRS #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__BMI__) /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi"))) #define _andn_u32(a, b) (__andn_u32((a), (b))) /* _bextr_u32 != __bextr_u32 */ #define _blsi_u32(a) (__blsi_u32((a))) #define _blsmsk_u32(a) (__blsmsk_u32((a))) #define _blsr_u32(a) (__blsr_u32((a))) /// Performs a bitwise AND of the second operand with the one's /// complement of the first operand. /// /// \headerfile /// /// This intrinsic corresponds to the ANDN instruction. /// /// \param __X /// An unsigned integer containing one of the operands. /// \param __Y /// An unsigned integer containing one of the operands. /// \returns An unsigned integer containing the bitwise AND of the second /// operand with the one's complement of the first operand. static __inline__ unsigned int __DEFAULT_FN_ATTRS __andn_u32(unsigned int __X, unsigned int __Y) { return ~__X & __Y; } /* AMD-specified, double-leading-underscore version of BEXTR */ /// Extracts the specified bits from the first operand and returns them /// in the least significant bits of the result. /// /// \headerfile /// /// This intrinsic corresponds to the BEXTR instruction. /// /// \param __X /// An unsigned integer whose bits are to be extracted. /// \param __Y /// An unsigned integer used to specify which bits are extracted. Bits [7:0] /// specify the index of the least significant bit. Bits [15:8] specify the /// number of bits to be extracted. /// \returns An unsigned integer whose least significant bits contain the /// extracted bits. /// \see _bextr_u32 static __inline__ unsigned int __DEFAULT_FN_ATTRS __bextr_u32(unsigned int __X, unsigned int __Y) { return __builtin_ia32_bextr_u32(__X, __Y); } /* Intel-specified, single-leading-underscore version of BEXTR */ /// Extracts the specified bits from the first operand and returns them /// in the least significant bits of the result. /// /// \headerfile /// /// This intrinsic corresponds to the BEXTR instruction. /// /// \param __X /// An unsigned integer whose bits are to be extracted. /// \param __Y /// An unsigned integer used to specify the index of the least significant /// bit for the bits to be extracted. Bits [7:0] specify the index. /// \param __Z /// An unsigned integer used to specify the number of bits to be extracted. /// Bits [7:0] specify the number of bits. /// \returns An unsigned integer whose least significant bits contain the /// extracted bits. /// \see __bextr_u32 static __inline__ unsigned int __DEFAULT_FN_ATTRS _bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z) { return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8))); } /* Intel-specified, single-leading-underscore version of BEXTR2 */ /// Extracts the specified bits from the first operand and returns them /// in the least significant bits of the result. /// /// \headerfile /// /// This intrinsic corresponds to the BEXTR instruction. /// /// \param __X /// An unsigned integer whose bits are to be extracted. /// \param __Y /// An unsigned integer used to specify which bits are extracted. Bits [7:0] /// specify the index of the least significant bit. Bits [15:8] specify the /// number of bits to be extracted. /// \returns An unsigned integer whose least significant bits contain the /// extracted bits. /// \see __bextr_u32 static __inline__ unsigned int __DEFAULT_FN_ATTRS _bextr2_u32(unsigned int __X, unsigned int __Y) { return __builtin_ia32_bextr_u32(__X, __Y); } /// Clears all bits in the source except for the least significant bit /// containing a value of 1 and returns the result. /// /// \headerfile /// /// This intrinsic corresponds to the BLSI instruction. /// /// \param __X /// An unsigned integer whose bits are to be cleared. /// \returns An unsigned integer containing the result of clearing the bits from /// the source operand. static __inline__ unsigned int __DEFAULT_FN_ATTRS __blsi_u32(unsigned int __X) { return __X & -__X; } /// Creates a mask whose bits are set to 1, using bit 0 up to and /// including the least significant bit that is set to 1 in the source /// operand and returns the result. /// /// \headerfile /// /// This intrinsic corresponds to the BLSMSK instruction. /// /// \param __X /// An unsigned integer used to create the mask. /// \returns An unsigned integer containing the newly created mask. static __inline__ unsigned int __DEFAULT_FN_ATTRS __blsmsk_u32(unsigned int __X) { return __X ^ (__X - 1); } /// Clears the least significant bit that is set to 1 in the source /// operand and returns the result. /// /// \headerfile /// /// This intrinsic corresponds to the BLSR instruction. /// /// \param __X /// An unsigned integer containing the operand to be cleared. /// \returns An unsigned integer containing the result of clearing the source /// operand. static __inline__ unsigned int __DEFAULT_FN_ATTRS __blsr_u32(unsigned int __X) { return __X & (__X - 1); } #ifdef __x86_64__ #define _andn_u64(a, b) (__andn_u64((a), (b))) /* _bextr_u64 != __bextr_u64 */ #define _blsi_u64(a) (__blsi_u64((a))) #define _blsmsk_u64(a) (__blsmsk_u64((a))) #define _blsr_u64(a) (__blsr_u64((a))) /// Performs a bitwise AND of the second operand with the one's /// complement of the first operand. /// /// \headerfile /// /// This intrinsic corresponds to the ANDN instruction. /// /// \param __X /// An unsigned 64-bit integer containing one of the operands. /// \param __Y /// An unsigned 64-bit integer containing one of the operands. /// \returns An unsigned 64-bit integer containing the bitwise AND of the second /// operand with the one's complement of the first operand. static __inline__ unsigned long long __DEFAULT_FN_ATTRS __andn_u64 (unsigned long long __X, unsigned long long __Y) { return ~__X & __Y; } /* AMD-specified, double-leading-underscore version of BEXTR */ /// Extracts the specified bits from the first operand and returns them /// in the least significant bits of the result. /// /// \headerfile /// /// This intrinsic corresponds to the BEXTR instruction. /// /// \param __X /// An unsigned 64-bit integer whose bits are to be extracted. /// \param __Y /// An unsigned 64-bit integer used to specify which bits are extracted. Bits /// [7:0] specify the index of the least significant bit. Bits [15:8] specify /// the number of bits to be extracted. /// \returns An unsigned 64-bit integer whose least significant bits contain the /// extracted bits. /// \see _bextr_u64 static __inline__ unsigned long long __DEFAULT_FN_ATTRS __bextr_u64(unsigned long long __X, unsigned long long __Y) { return __builtin_ia32_bextr_u64(__X, __Y); } /* Intel-specified, single-leading-underscore version of BEXTR */ /// Extracts the specified bits from the first operand and returns them /// in the least significant bits of the result. /// /// \headerfile /// /// This intrinsic corresponds to the BEXTR instruction. /// /// \param __X /// An unsigned 64-bit integer whose bits are to be extracted. /// \param __Y /// An unsigned integer used to specify the index of the least significant /// bit for the bits to be extracted. Bits [7:0] specify the index. /// \param __Z /// An unsigned integer used to specify the number of bits to be extracted. /// Bits [7:0] specify the number of bits. /// \returns An unsigned 64-bit integer whose least significant bits contain the /// extracted bits. /// \see __bextr_u64 static __inline__ unsigned long long __DEFAULT_FN_ATTRS _bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z) { return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8))); } /* Intel-specified, single-leading-underscore version of BEXTR2 */ /// Extracts the specified bits from the first operand and returns them /// in the least significant bits of the result. /// /// \headerfile /// /// This intrinsic corresponds to the BEXTR instruction. /// /// \param __X /// An unsigned 64-bit integer whose bits are to be extracted. /// \param __Y /// An unsigned 64-bit integer used to specify which bits are extracted. Bits /// [7:0] specify the index of the least significant bit. Bits [15:8] specify /// the number of bits to be extracted. /// \returns An unsigned 64-bit integer whose least significant bits contain the /// extracted bits. /// \see __bextr_u64 static __inline__ unsigned long long __DEFAULT_FN_ATTRS _bextr2_u64(unsigned long long __X, unsigned long long __Y) { return __builtin_ia32_bextr_u64(__X, __Y); } /// Clears all bits in the source except for the least significant bit /// containing a value of 1 and returns the result. /// /// \headerfile /// /// This intrinsic corresponds to the BLSI instruction. /// /// \param __X /// An unsigned 64-bit integer whose bits are to be cleared. /// \returns An unsigned 64-bit integer containing the result of clearing the /// bits from the source operand. static __inline__ unsigned long long __DEFAULT_FN_ATTRS __blsi_u64(unsigned long long __X) { return __X & -__X; } /// Creates a mask whose bits are set to 1, using bit 0 up to and /// including the least significant bit that is set to 1 in the source /// operand and returns the result. /// /// \headerfile /// /// This intrinsic corresponds to the BLSMSK instruction. /// /// \param __X /// An unsigned 64-bit integer used to create the mask. /// \returns An unsigned 64-bit integer containing the newly created mask. static __inline__ unsigned long long __DEFAULT_FN_ATTRS __blsmsk_u64(unsigned long long __X) { return __X ^ (__X - 1); } /// Clears the least significant bit that is set to 1 in the source /// operand and returns the result. /// /// \headerfile /// /// This intrinsic corresponds to the BLSR instruction. /// /// \param __X /// An unsigned 64-bit integer containing the operand to be cleared. /// \returns An unsigned 64-bit integer containing the result of clearing the /// source operand. static __inline__ unsigned long long __DEFAULT_FN_ATTRS __blsr_u64(unsigned long long __X) { return __X & (__X - 1); } #endif /* __x86_64__ */ #undef __DEFAULT_FN_ATTRS #endif /* !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) \ || defined(__BMI__) */ #endif /* __BMIINTRIN_H */ /*===---- builtins.h - Standard header for extra builtins -----------------===*\ * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * \*===----------------------------------------------------------------------===*/ /// Some legacy compilers have builtin definitions in a file named builtins.h. /// This header file has been added to allow compatibility with code that was /// written for those compilers. Code may have an include line for this file /// and to avoid an error an empty file with this name is provided. #ifndef __BUILTINS_H #define __BUILTINS_H #endif /* __BUILTINS_H */ /*===------ cet.h -Control-flow Enforcement Technology feature ------------=== * Add x86 feature with IBT and/or SHSTK bits to ELF program property if they * are enabled. Otherwise, contents in this header file are unused. This file * is mainly design for assembly source code which want to enable CET. * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CET_H #define __CET_H #ifdef __ASSEMBLER__ #ifndef __CET__ # define _CET_ENDBR #endif #ifdef __CET__ # ifdef __LP64__ # if __CET__ & 0x1 # define _CET_ENDBR endbr64 # else # define _CET_ENDBR # endif # else # if __CET__ & 0x1 # define _CET_ENDBR endbr32 # else # define _CET_ENDBR # endif # endif # ifdef __LP64__ # define __PROPERTY_ALIGN 3 # else # define __PROPERTY_ALIGN 2 # endif .pushsection ".note.gnu.property", "a" .p2align __PROPERTY_ALIGN .long 1f - 0f /* name length. */ .long 4f - 1f /* data length. */ /* NT_GNU_PROPERTY_TYPE_0. */ .long 5 /* note type. */ 0: .asciz "GNU" /* vendor name. */ 1: .p2align __PROPERTY_ALIGN /* GNU_PROPERTY_X86_FEATURE_1_AND. */ .long 0xc0000002 /* pr_type. */ .long 3f - 2f /* pr_datasz. */ 2: /* GNU_PROPERTY_X86_FEATURE_1_XXX. */ .long __CET__ 3: .p2align __PROPERTY_ALIGN 4: .popsection #endif #endif #endif /*===---- cetintrin.h - CET intrinsic --------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __CETINTRIN_H #define __CETINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("shstk"))) static __inline__ void __DEFAULT_FN_ATTRS _incsspd(int __a) { __builtin_ia32_incsspd((unsigned int)__a); } #ifdef __x86_64__ static __inline__ void __DEFAULT_FN_ATTRS _incsspq(unsigned long long __a) { __builtin_ia32_incsspq(__a); } #endif /* __x86_64__ */ #ifdef __x86_64__ static __inline__ void __DEFAULT_FN_ATTRS _inc_ssp(unsigned int __a) { __builtin_ia32_incsspq(__a); } #else /* __x86_64__ */ static __inline__ void __DEFAULT_FN_ATTRS _inc_ssp(unsigned int __a) { __builtin_ia32_incsspd(__a); } #endif /* __x86_64__ */ static __inline__ unsigned int __DEFAULT_FN_ATTRS _rdsspd(unsigned int __a) { return __builtin_ia32_rdsspd(__a); } static __inline__ unsigned int __DEFAULT_FN_ATTRS _rdsspd_i32(void) { #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wuninitialized" unsigned int t; return __builtin_ia32_rdsspd(t); #pragma clang diagnostic pop } #ifdef __x86_64__ static __inline__ unsigned long long __DEFAULT_FN_ATTRS _rdsspq(unsigned long long __a) { return __builtin_ia32_rdsspq(__a); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS _rdsspq_i64(void) { #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wuninitialized" unsigned long long t; return __builtin_ia32_rdsspq(t); #pragma clang diagnostic pop } #endif /* __x86_64__ */ #ifdef __x86_64__ static __inline__ unsigned long long __DEFAULT_FN_ATTRS _get_ssp(void) { return __builtin_ia32_rdsspq(0); } #else /* __x86_64__ */ static __inline__ unsigned int __DEFAULT_FN_ATTRS _get_ssp(void) { return __builtin_ia32_rdsspd(0); } #endif /* __x86_64__ */ static __inline__ void __DEFAULT_FN_ATTRS _saveprevssp(void) { __builtin_ia32_saveprevssp(); } static __inline__ void __DEFAULT_FN_ATTRS _rstorssp(void * __p) { __builtin_ia32_rstorssp(__p); } static __inline__ void __DEFAULT_FN_ATTRS _wrssd(unsigned int __a, void * __p) { __builtin_ia32_wrssd(__a, __p); } #ifdef __x86_64__ static __inline__ void __DEFAULT_FN_ATTRS _wrssq(unsigned long long __a, void * __p) { __builtin_ia32_wrssq(__a, __p); } #endif /* __x86_64__ */ static __inline__ void __DEFAULT_FN_ATTRS _wrussd(unsigned int __a, void * __p) { __builtin_ia32_wrussd(__a, __p); } #ifdef __x86_64__ static __inline__ void __DEFAULT_FN_ATTRS _wrussq(unsigned long long __a, void * __p) { __builtin_ia32_wrussq(__a, __p); } #endif /* __x86_64__ */ static __inline__ void __DEFAULT_FN_ATTRS _setssbsy(void) { __builtin_ia32_setssbsy(); } static __inline__ void __DEFAULT_FN_ATTRS _clrssbsy(void * __p) { __builtin_ia32_clrssbsy(__p); } #undef __DEFAULT_FN_ATTRS #endif /* __CETINTRIN_H */ /*===---- cldemoteintrin.h - CLDEMOTE intrinsic ----------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #if !defined __X86INTRIN_H && !defined __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __CLDEMOTEINTRIN_H #define __CLDEMOTEINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("cldemote"))) /// Hint to hardware that the cache line that contains \p __P should be demoted /// from the cache closest to the processor core to a level more distant from /// the processor core. /// /// \headerfile /// /// This intrinsic corresponds to the CLDEMOTE instruction. static __inline__ void __DEFAULT_FN_ATTRS _cldemote(const void * __P) { __builtin_ia32_cldemote(__P); } #define _mm_cldemote(p) _cldemote(p) #undef __DEFAULT_FN_ATTRS #endif /*===---- clflushoptintrin.h - CLFLUSHOPT intrinsic ------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __CLFLUSHOPTINTRIN_H #define __CLFLUSHOPTINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("clflushopt"))) /// Invalidates all levels of the cache hierarchy and flushes modified data to /// memory for the cache line specified by the address \a __m. /// /// \headerfile /// /// This intrinsic corresponds to the \c CLFLUSHOPT instruction. /// /// \param __m /// An address within the cache line to flush and invalidate. static __inline__ void __DEFAULT_FN_ATTRS _mm_clflushopt(void const * __m) { __builtin_ia32_clflushopt(__m); } #undef __DEFAULT_FN_ATTRS #endif /*===---- clwbintrin.h - CLWB intrinsic ------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __CLWBINTRIN_H #define __CLWBINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("clwb"))) /// Writes back to memory the cache line (if modified) that contains the /// linear address specified in \a __p from any level of the cache hierarchy in /// the cache coherence domain /// /// \headerfile /// /// This intrinsic corresponds to the CLWB instruction. /// /// \param __p /// A pointer to the memory location used to identify the cache line to be /// written back. static __inline__ void __DEFAULT_FN_ATTRS _mm_clwb(void const *__p) { __builtin_ia32_clwb(__p); } #undef __DEFAULT_FN_ATTRS #endif /*===----------------------- clzerointrin.h - CLZERO ----------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __X86INTRIN_H #error "Never use directly; include instead." #endif #ifndef __CLZEROINTRIN_H #define __CLZEROINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("clzero"))) /// Zeroes out the cache line for the address \a __line. This uses a /// non-temporal store. Calling \c _mm_sfence() afterward might be needed /// to enforce ordering. /// /// \headerfile /// /// This intrinsic corresponds to the \c CLZERO instruction. /// /// \param __line /// An address within the cache line to zero out. static __inline__ void __DEFAULT_FN_ATTRS _mm_clzero (void * __line) { __builtin_ia32_clzero ((void *)__line); } #undef __DEFAULT_FN_ATTRS #endif /* __CLZEROINTRIN_H */ /*===--------------- cmpccxaddintrin.h - CMPCCXADD intrinsics--------------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __X86GPRINTRIN_H #error \ "Never use directly; include instead." #endif // __X86GPRINTRIN_H #ifndef __CMPCCXADDINTRIN_H #define __CMPCCXADDINTRIN_H #ifdef __x86_64__ typedef enum { _CMPCCX_O, /* Overflow. */ _CMPCCX_NO, /* No overflow. */ _CMPCCX_B, /* Below. */ _CMPCCX_NB, /* Not below. */ _CMPCCX_Z, /* Zero. */ _CMPCCX_NZ, /* Not zero. */ _CMPCCX_BE, /* Below or equal. */ _CMPCCX_NBE, /* Neither below nor equal. */ _CMPCCX_S, /* Sign. */ _CMPCCX_NS, /* No sign. */ _CMPCCX_P, /* Parity. */ _CMPCCX_NP, /* No parity. */ _CMPCCX_L, /* Less. */ _CMPCCX_NL, /* Not less. */ _CMPCCX_LE, /* Less or equal. */ _CMPCCX_NLE, /* Neither less nor equal. */ } _CMPCCX_ENUM; /// Compares the value from the memory __A with the value of __B. If the /// specified condition __D is met, then add the third operand __C to the /// __A and write it into __A, else the value of __A is unchanged. The return /// value is the original value of __A. /// /// \headerfile /// /// This intrinsic corresponds to the \c CMPCCXADD instructions. /// /// \param __A /// __A pointer specifying the memory address. /// /// \param __B /// A integer operand. /// /// \param __C /// A integer operand. /// /// \param __D /// The specified condition. /// /// \returns a integer which is the original value of first operand. #define _cmpccxadd_epi32(__A, __B, __C, __D) \ ((int)(__builtin_ia32_cmpccxadd32((void *)(__A), (int)(__B), (int)(__C), \ (int)(__D)))) #define _cmpccxadd_epi64(__A, __B, __C, __D) \ ((long long)(__builtin_ia32_cmpccxadd64((void *)(__A), (long long)(__B), \ (long long)(__C), (int)(__D)))) #endif // __x86_64__ #endif // __CMPCCXADDINTRIN_H /*===---- cpuid.h - X86 cpu model detection --------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CPUID_H #define __CPUID_H #if !(__x86_64__ || __i386__) #error this header is for x86 only #endif /* Responses identification request with %eax 0 */ /* AMD: "AuthenticAMD" */ #define signature_AMD_ebx 0x68747541 #define signature_AMD_edx 0x69746e65 #define signature_AMD_ecx 0x444d4163 /* CENTAUR: "CentaurHauls" */ #define signature_CENTAUR_ebx 0x746e6543 #define signature_CENTAUR_edx 0x48727561 #define signature_CENTAUR_ecx 0x736c7561 /* CYRIX: "CyrixInstead" */ #define signature_CYRIX_ebx 0x69727943 #define signature_CYRIX_edx 0x736e4978 #define signature_CYRIX_ecx 0x64616574 /* HYGON: "HygonGenuine" */ #define signature_HYGON_ebx 0x6f677948 #define signature_HYGON_edx 0x6e65476e #define signature_HYGON_ecx 0x656e6975 /* INTEL: "GenuineIntel" */ #define signature_INTEL_ebx 0x756e6547 #define signature_INTEL_edx 0x49656e69 #define signature_INTEL_ecx 0x6c65746e /* TM1: "TransmetaCPU" */ #define signature_TM1_ebx 0x6e617254 #define signature_TM1_edx 0x74656d73 #define signature_TM1_ecx 0x55504361 /* TM2: "GenuineTMx86" */ #define signature_TM2_ebx 0x756e6547 #define signature_TM2_edx 0x54656e69 #define signature_TM2_ecx 0x3638784d /* NSC: "Geode by NSC" */ #define signature_NSC_ebx 0x646f6547 #define signature_NSC_edx 0x79622065 #define signature_NSC_ecx 0x43534e20 /* NEXGEN: "NexGenDriven" */ #define signature_NEXGEN_ebx 0x4778654e #define signature_NEXGEN_edx 0x72446e65 #define signature_NEXGEN_ecx 0x6e657669 /* RISE: "RiseRiseRise" */ #define signature_RISE_ebx 0x65736952 #define signature_RISE_edx 0x65736952 #define signature_RISE_ecx 0x65736952 /* SIS: "SiS SiS SiS " */ #define signature_SIS_ebx 0x20536953 #define signature_SIS_edx 0x20536953 #define signature_SIS_ecx 0x20536953 /* UMC: "UMC UMC UMC " */ #define signature_UMC_ebx 0x20434d55 #define signature_UMC_edx 0x20434d55 #define signature_UMC_ecx 0x20434d55 /* VIA: "VIA VIA VIA " */ #define signature_VIA_ebx 0x20414956 #define signature_VIA_edx 0x20414956 #define signature_VIA_ecx 0x20414956 /* VORTEX: "Vortex86 SoC" */ #define signature_VORTEX_ebx 0x74726f56 #define signature_VORTEX_edx 0x36387865 #define signature_VORTEX_ecx 0x436f5320 /* Features in %ecx for leaf 1 */ #define bit_SSE3 0x00000001 #define bit_PCLMULQDQ 0x00000002 #define bit_PCLMUL bit_PCLMULQDQ /* for gcc compat */ #define bit_DTES64 0x00000004 #define bit_MONITOR 0x00000008 #define bit_DSCPL 0x00000010 #define bit_VMX 0x00000020 #define bit_SMX 0x00000040 #define bit_EIST 0x00000080 #define bit_TM2 0x00000100 #define bit_SSSE3 0x00000200 #define bit_CNXTID 0x00000400 #define bit_FMA 0x00001000 #define bit_CMPXCHG16B 0x00002000 #define bit_xTPR 0x00004000 #define bit_PDCM 0x00008000 #define bit_PCID 0x00020000 #define bit_DCA 0x00040000 #define bit_SSE41 0x00080000 #define bit_SSE4_1 bit_SSE41 /* for gcc compat */ #define bit_SSE42 0x00100000 #define bit_SSE4_2 bit_SSE42 /* for gcc compat */ #define bit_x2APIC 0x00200000 #define bit_MOVBE 0x00400000 #define bit_POPCNT 0x00800000 #define bit_TSCDeadline 0x01000000 #define bit_AESNI 0x02000000 #define bit_AES bit_AESNI /* for gcc compat */ #define bit_XSAVE 0x04000000 #define bit_OSXSAVE 0x08000000 #define bit_AVX 0x10000000 #define bit_F16C 0x20000000 #define bit_RDRND 0x40000000 /* Features in %edx for leaf 1 */ #define bit_FPU 0x00000001 #define bit_VME 0x00000002 #define bit_DE 0x00000004 #define bit_PSE 0x00000008 #define bit_TSC 0x00000010 #define bit_MSR 0x00000020 #define bit_PAE 0x00000040 #define bit_MCE 0x00000080 #define bit_CX8 0x00000100 #define bit_CMPXCHG8B bit_CX8 /* for gcc compat */ #define bit_APIC 0x00000200 #define bit_SEP 0x00000800 #define bit_MTRR 0x00001000 #define bit_PGE 0x00002000 #define bit_MCA 0x00004000 #define bit_CMOV 0x00008000 #define bit_PAT 0x00010000 #define bit_PSE36 0x00020000 #define bit_PSN 0x00040000 #define bit_CLFSH 0x00080000 #define bit_DS 0x00200000 #define bit_ACPI 0x00400000 #define bit_MMX 0x00800000 #define bit_FXSR 0x01000000 #define bit_FXSAVE bit_FXSR /* for gcc compat */ #define bit_SSE 0x02000000 #define bit_SSE2 0x04000000 #define bit_SS 0x08000000 #define bit_HTT 0x10000000 #define bit_TM 0x20000000 #define bit_PBE 0x80000000 /* Features in %ebx for leaf 7 sub-leaf 0 */ #define bit_FSGSBASE 0x00000001 #define bit_SGX 0x00000004 #define bit_BMI 0x00000008 #define bit_HLE 0x00000010 #define bit_AVX2 0x00000020 #define bit_SMEP 0x00000080 #define bit_BMI2 0x00000100 #define bit_ENH_MOVSB 0x00000200 #define bit_INVPCID 0x00000400 #define bit_RTM 0x00000800 #define bit_MPX 0x00004000 #define bit_AVX512F 0x00010000 #define bit_AVX512DQ 0x00020000 #define bit_RDSEED 0x00040000 #define bit_ADX 0x00080000 #define bit_AVX512IFMA 0x00200000 #define bit_CLFLUSHOPT 0x00800000 #define bit_CLWB 0x01000000 #define bit_AVX512PF 0x04000000 #define bit_AVX512ER 0x08000000 #define bit_AVX512CD 0x10000000 #define bit_SHA 0x20000000 #define bit_AVX512BW 0x40000000 #define bit_AVX512VL 0x80000000 /* Features in %ecx for leaf 7 sub-leaf 0 */ #define bit_PREFTCHWT1 0x00000001 #define bit_AVX512VBMI 0x00000002 #define bit_PKU 0x00000004 #define bit_OSPKE 0x00000010 #define bit_WAITPKG 0x00000020 #define bit_AVX512VBMI2 0x00000040 #define bit_SHSTK 0x00000080 #define bit_GFNI 0x00000100 #define bit_VAES 0x00000200 #define bit_VPCLMULQDQ 0x00000400 #define bit_AVX512VNNI 0x00000800 #define bit_AVX512BITALG 0x00001000 #define bit_AVX512VPOPCNTDQ 0x00004000 #define bit_RDPID 0x00400000 #define bit_CLDEMOTE 0x02000000 #define bit_MOVDIRI 0x08000000 #define bit_MOVDIR64B 0x10000000 #define bit_ENQCMD 0x20000000 /* Features in %edx for leaf 7 sub-leaf 0 */ #define bit_AVX5124VNNIW 0x00000004 #define bit_AVX5124FMAPS 0x00000008 #define bit_UINTR 0x00000020 #define bit_SERIALIZE 0x00004000 #define bit_TSXLDTRK 0x00010000 #define bit_PCONFIG 0x00040000 #define bit_IBT 0x00100000 #define bit_AMXBF16 0x00400000 #define bit_AVX512FP16 0x00800000 #define bit_AMXTILE 0x01000000 #define bit_AMXINT8 0x02000000 /* Features in %eax for leaf 7 sub-leaf 1 */ #define bit_RAOINT 0x00000008 #define bit_AVXVNNI 0x00000010 #define bit_AVX512BF16 0x00000020 #define bit_CMPCCXADD 0x00000080 #define bit_AMXFP16 0x00200000 #define bit_HRESET 0x00400000 #define bit_AVXIFMA 0x00800000 /* Features in %edx for leaf 7 sub-leaf 1 */ #define bit_AVXVNNIINT8 0x00000010 #define bit_AVXNECONVERT 0x00000020 #define bit_PREFETCHI 0x00004000 /* Features in %eax for leaf 13 sub-leaf 1 */ #define bit_XSAVEOPT 0x00000001 #define bit_XSAVEC 0x00000002 #define bit_XSAVES 0x00000008 /* Features in %eax for leaf 0x14 sub-leaf 0 */ #define bit_PTWRITE 0x00000010 /* Features in %ecx for leaf 0x80000001 */ #define bit_LAHF_LM 0x00000001 #define bit_ABM 0x00000020 #define bit_LZCNT bit_ABM /* for gcc compat */ #define bit_SSE4a 0x00000040 #define bit_PRFCHW 0x00000100 #define bit_XOP 0x00000800 #define bit_LWP 0x00008000 #define bit_FMA4 0x00010000 #define bit_TBM 0x00200000 #define bit_MWAITX 0x20000000 /* Features in %edx for leaf 0x80000001 */ #define bit_MMXEXT 0x00400000 #define bit_LM 0x20000000 #define bit_3DNOWP 0x40000000 #define bit_3DNOW 0x80000000 /* Features in %ebx for leaf 0x80000008 */ #define bit_CLZERO 0x00000001 #define bit_RDPRU 0x00000010 #define bit_WBNOINVD 0x00000200 #if __i386__ #define __cpuid(__leaf, __eax, __ebx, __ecx, __edx) \ __asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \ : "0"(__leaf)) #define __cpuid_count(__leaf, __count, __eax, __ebx, __ecx, __edx) \ __asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \ : "0"(__leaf), "2"(__count)) #else /* x86-64 uses %rbx as the base register, so preserve it. */ #define __cpuid(__leaf, __eax, __ebx, __ecx, __edx) \ __asm(" xchgq %%rbx,%q1\n" \ " cpuid\n" \ " xchgq %%rbx,%q1" \ : "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) \ : "0"(__leaf)) #define __cpuid_count(__leaf, __count, __eax, __ebx, __ecx, __edx) \ __asm(" xchgq %%rbx,%q1\n" \ " cpuid\n" \ " xchgq %%rbx,%q1" \ : "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) \ : "0"(__leaf), "2"(__count)) #endif static __inline unsigned int __get_cpuid_max (unsigned int __leaf, unsigned int *__sig) { unsigned int __eax, __ebx, __ecx, __edx; #if __i386__ int __cpuid_supported; __asm(" pushfl\n" " popl %%eax\n" " movl %%eax,%%ecx\n" " xorl $0x00200000,%%eax\n" " pushl %%eax\n" " popfl\n" " pushfl\n" " popl %%eax\n" " movl $0,%0\n" " cmpl %%eax,%%ecx\n" " je 1f\n" " movl $1,%0\n" "1:" : "=r" (__cpuid_supported) : : "eax", "ecx"); if (!__cpuid_supported) return 0; #endif __cpuid(__leaf, __eax, __ebx, __ecx, __edx); if (__sig) *__sig = __ebx; return __eax; } static __inline int __get_cpuid (unsigned int __leaf, unsigned int *__eax, unsigned int *__ebx, unsigned int *__ecx, unsigned int *__edx) { unsigned int __max_leaf = __get_cpuid_max(__leaf & 0x80000000, 0); if (__max_leaf == 0 || __max_leaf < __leaf) return 0; __cpuid(__leaf, *__eax, *__ebx, *__ecx, *__edx); return 1; } static __inline int __get_cpuid_count (unsigned int __leaf, unsigned int __subleaf, unsigned int *__eax, unsigned int *__ebx, unsigned int *__ecx, unsigned int *__edx) { unsigned int __max_leaf = __get_cpuid_max(__leaf & 0x80000000, 0); if (__max_leaf == 0 || __max_leaf < __leaf) return 0; __cpuid_count(__leaf, __subleaf, *__eax, *__ebx, *__ecx, *__edx); return 1; } #endif /* __CPUID_H */ /*===---- crc32intrin.h - SSE4.2 Accumulate CRC32 intrinsics ---------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CRC32INTRIN_H #define __CRC32INTRIN_H #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("crc32"))) /// Adds the unsigned integer operand to the CRC-32C checksum of the /// unsigned char operand. /// /// \headerfile /// /// This intrinsic corresponds to the CRC32B instruction. /// /// \param __C /// An unsigned integer operand to add to the CRC-32C checksum of operand /// \a __D. /// \param __D /// An unsigned 8-bit integer operand used to compute the CRC-32C checksum. /// \returns The result of adding operand \a __C to the CRC-32C checksum of /// operand \a __D. static __inline__ unsigned int __DEFAULT_FN_ATTRS _mm_crc32_u8(unsigned int __C, unsigned char __D) { return __builtin_ia32_crc32qi(__C, __D); } /// Adds the unsigned integer operand to the CRC-32C checksum of the /// unsigned short operand. /// /// \headerfile /// /// This intrinsic corresponds to the CRC32W instruction. /// /// \param __C /// An unsigned integer operand to add to the CRC-32C checksum of operand /// \a __D. /// \param __D /// An unsigned 16-bit integer operand used to compute the CRC-32C checksum. /// \returns The result of adding operand \a __C to the CRC-32C checksum of /// operand \a __D. static __inline__ unsigned int __DEFAULT_FN_ATTRS _mm_crc32_u16(unsigned int __C, unsigned short __D) { return __builtin_ia32_crc32hi(__C, __D); } /// Adds the first unsigned integer operand to the CRC-32C checksum of /// the second unsigned integer operand. /// /// \headerfile /// /// This intrinsic corresponds to the CRC32L instruction. /// /// \param __C /// An unsigned integer operand to add to the CRC-32C checksum of operand /// \a __D. /// \param __D /// An unsigned 32-bit integer operand used to compute the CRC-32C checksum. /// \returns The result of adding operand \a __C to the CRC-32C checksum of /// operand \a __D. static __inline__ unsigned int __DEFAULT_FN_ATTRS _mm_crc32_u32(unsigned int __C, unsigned int __D) { return __builtin_ia32_crc32si(__C, __D); } #ifdef __x86_64__ /// Adds the unsigned integer operand to the CRC-32C checksum of the /// unsigned 64-bit integer operand. /// /// \headerfile /// /// This intrinsic corresponds to the CRC32Q instruction. /// /// \param __C /// An unsigned integer operand to add to the CRC-32C checksum of operand /// \a __D. /// \param __D /// An unsigned 64-bit integer operand used to compute the CRC-32C checksum. /// \returns The result of adding operand \a __C to the CRC-32C checksum of /// operand \a __D. static __inline__ unsigned long long __DEFAULT_FN_ATTRS _mm_crc32_u64(unsigned long long __C, unsigned long long __D) { return __builtin_ia32_crc32di(__C, __D); } #endif /* __x86_64__ */ #undef __DEFAULT_FN_ATTRS #endif /* __CRC32INTRIN_H */ /*===---- emmintrin.h - SSE2 intrinsics ------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __EMMINTRIN_H #define __EMMINTRIN_H #if !defined(__i386__) && !defined(__x86_64__) #error "This header is only meant to be used on x86 and x64 architecture" #endif #include typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16))); typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16))); typedef double __m128d_u __attribute__((__vector_size__(16), __aligned__(1))); typedef long long __m128i_u __attribute__((__vector_size__(16), __aligned__(1))); /* Type defines. */ typedef double __v2df __attribute__((__vector_size__(16))); typedef long long __v2di __attribute__((__vector_size__(16))); typedef short __v8hi __attribute__((__vector_size__(16))); typedef char __v16qi __attribute__((__vector_size__(16))); /* Unsigned types */ typedef unsigned long long __v2du __attribute__((__vector_size__(16))); typedef unsigned short __v8hu __attribute__((__vector_size__(16))); typedef unsigned char __v16qu __attribute__((__vector_size__(16))); /* We need an explicitly signed variant for char. Note that this shouldn't * appear in the interface though. */ typedef signed char __v16qs __attribute__((__vector_size__(16))); #ifdef __SSE2__ /* Both _Float16 and __bf16 require SSE2 being enabled. */ typedef _Float16 __v8hf __attribute__((__vector_size__(16), __aligned__(16))); typedef _Float16 __m128h __attribute__((__vector_size__(16), __aligned__(16))); typedef _Float16 __m128h_u __attribute__((__vector_size__(16), __aligned__(1))); typedef __bf16 __v8bf __attribute__((__vector_size__(16), __aligned__(16))); typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16))); #endif /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("sse2"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS_MMX \ __attribute__((__always_inline__, __nodebug__, __target__("mmx,sse2"), \ __min_vector_width__(64))) /// Adds lower double-precision values in both operands and returns the /// sum in the lower 64 bits of the result. The upper 64 bits of the result /// are copied from the upper double-precision value of the first operand. /// /// \headerfile /// /// This intrinsic corresponds to the VADDSD / ADDSD instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing one of the source operands. /// \param __b /// A 128-bit vector of [2 x double] containing one of the source operands. /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the /// sum of the lower 64 bits of both operands. The upper 64 bits are copied /// from the upper 64 bits of the first source operand. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_sd(__m128d __a, __m128d __b) { __a[0] += __b[0]; return __a; } /// Adds two 128-bit vectors of [2 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VADDPD / ADDPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing one of the source operands. /// \param __b /// A 128-bit vector of [2 x double] containing one of the source operands. /// \returns A 128-bit vector of [2 x double] containing the sums of both /// operands. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_pd(__m128d __a, __m128d __b) { return (__m128d)((__v2df)__a + (__v2df)__b); } /// Subtracts the lower double-precision value of the second operand /// from the lower double-precision value of the first operand and returns /// the difference in the lower 64 bits of the result. The upper 64 bits of /// the result are copied from the upper double-precision value of the first /// operand. /// /// \headerfile /// /// This intrinsic corresponds to the VSUBSD / SUBSD instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing the minuend. /// \param __b /// A 128-bit vector of [2 x double] containing the subtrahend. /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the /// difference of the lower 64 bits of both operands. The upper 64 bits are /// copied from the upper 64 bits of the first source operand. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_sd(__m128d __a, __m128d __b) { __a[0] -= __b[0]; return __a; } /// Subtracts two 128-bit vectors of [2 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VSUBPD / SUBPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing the minuend. /// \param __b /// A 128-bit vector of [2 x double] containing the subtrahend. /// \returns A 128-bit vector of [2 x double] containing the differences between /// both operands. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_pd(__m128d __a, __m128d __b) { return (__m128d)((__v2df)__a - (__v2df)__b); } /// Multiplies lower double-precision values in both operands and returns /// the product in the lower 64 bits of the result. The upper 64 bits of the /// result are copied from the upper double-precision value of the first /// operand. /// /// \headerfile /// /// This intrinsic corresponds to the VMULSD / MULSD instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing one of the source operands. /// \param __b /// A 128-bit vector of [2 x double] containing one of the source operands. /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the /// product of the lower 64 bits of both operands. The upper 64 bits are /// copied from the upper 64 bits of the first source operand. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_sd(__m128d __a, __m128d __b) { __a[0] *= __b[0]; return __a; } /// Multiplies two 128-bit vectors of [2 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VMULPD / MULPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing one of the operands. /// \param __b /// A 128-bit vector of [2 x double] containing one of the operands. /// \returns A 128-bit vector of [2 x double] containing the products of both /// operands. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_pd(__m128d __a, __m128d __b) { return (__m128d)((__v2df)__a * (__v2df)__b); } /// Divides the lower double-precision value of the first operand by the /// lower double-precision value of the second operand and returns the /// quotient in the lower 64 bits of the result. The upper 64 bits of the /// result are copied from the upper double-precision value of the first /// operand. /// /// \headerfile /// /// This intrinsic corresponds to the VDIVSD / DIVSD instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing the dividend. /// \param __b /// A 128-bit vector of [2 x double] containing divisor. /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the /// quotient of the lower 64 bits of both operands. The upper 64 bits are /// copied from the upper 64 bits of the first source operand. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_sd(__m128d __a, __m128d __b) { __a[0] /= __b[0]; return __a; } /// Performs an element-by-element division of two 128-bit vectors of /// [2 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VDIVPD / DIVPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing the dividend. /// \param __b /// A 128-bit vector of [2 x double] containing the divisor. /// \returns A 128-bit vector of [2 x double] containing the quotients of both /// operands. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_pd(__m128d __a, __m128d __b) { return (__m128d)((__v2df)__a / (__v2df)__b); } /// Calculates the square root of the lower double-precision value of /// the second operand and returns it in the lower 64 bits of the result. /// The upper 64 bits of the result are copied from the upper /// double-precision value of the first operand. /// /// \headerfile /// /// This intrinsic corresponds to the VSQRTSD / SQRTSD instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing one of the operands. The /// upper 64 bits of this operand are copied to the upper 64 bits of the /// result. /// \param __b /// A 128-bit vector of [2 x double] containing one of the operands. The /// square root is calculated using the lower 64 bits of this operand. /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the /// square root of the lower 64 bits of operand \a __b, and whose upper 64 /// bits are copied from the upper 64 bits of operand \a __a. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_sd(__m128d __a, __m128d __b) { __m128d __c = __builtin_ia32_sqrtsd((__v2df)__b); return __extension__(__m128d){__c[0], __a[1]}; } /// Calculates the square root of the each of two values stored in a /// 128-bit vector of [2 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VSQRTPD / SQRTPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector of [2 x double] containing the square roots of the /// values in the operand. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_pd(__m128d __a) { return __builtin_ia32_sqrtpd((__v2df)__a); } /// Compares lower 64-bit double-precision values of both operands, and /// returns the lesser of the pair of values in the lower 64-bits of the /// result. The upper 64 bits of the result are copied from the upper /// double-precision value of the first operand. /// /// \headerfile /// /// This intrinsic corresponds to the VMINSD / MINSD instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing one of the operands. The /// lower 64 bits of this operand are used in the comparison. /// \param __b /// A 128-bit vector of [2 x double] containing one of the operands. The /// lower 64 bits of this operand are used in the comparison. /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the /// minimum value between both operands. The upper 64 bits are copied from /// the upper 64 bits of the first source operand. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_sd(__m128d __a, __m128d __b) { return __builtin_ia32_minsd((__v2df)__a, (__v2df)__b); } /// Performs element-by-element comparison of the two 128-bit vectors of /// [2 x double] and returns the vector containing the lesser of each pair of /// values. /// /// \headerfile /// /// This intrinsic corresponds to the VMINPD / MINPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing one of the operands. /// \param __b /// A 128-bit vector of [2 x double] containing one of the operands. /// \returns A 128-bit vector of [2 x double] containing the minimum values /// between both operands. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_pd(__m128d __a, __m128d __b) { return __builtin_ia32_minpd((__v2df)__a, (__v2df)__b); } /// Compares lower 64-bit double-precision values of both operands, and /// returns the greater of the pair of values in the lower 64-bits of the /// result. The upper 64 bits of the result are copied from the upper /// double-precision value of the first operand. /// /// \headerfile /// /// This intrinsic corresponds to the VMAXSD / MAXSD instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing one of the operands. The /// lower 64 bits of this operand are used in the comparison. /// \param __b /// A 128-bit vector of [2 x double] containing one of the operands. The /// lower 64 bits of this operand are used in the comparison. /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the /// maximum value between both operands. The upper 64 bits are copied from /// the upper 64 bits of the first source operand. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_sd(__m128d __a, __m128d __b) { return __builtin_ia32_maxsd((__v2df)__a, (__v2df)__b); } /// Performs element-by-element comparison of the two 128-bit vectors of /// [2 x double] and returns the vector containing the greater of each pair /// of values. /// /// \headerfile /// /// This intrinsic corresponds to the VMAXPD / MAXPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing one of the operands. /// \param __b /// A 128-bit vector of [2 x double] containing one of the operands. /// \returns A 128-bit vector of [2 x double] containing the maximum values /// between both operands. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_pd(__m128d __a, __m128d __b) { return __builtin_ia32_maxpd((__v2df)__a, (__v2df)__b); } /// Performs a bitwise AND of two 128-bit vectors of [2 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VPAND / PAND instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing one of the source operands. /// \param __b /// A 128-bit vector of [2 x double] containing one of the source operands. /// \returns A 128-bit vector of [2 x double] containing the bitwise AND of the /// values between both operands. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_and_pd(__m128d __a, __m128d __b) { return (__m128d)((__v2du)__a & (__v2du)__b); } /// Performs a bitwise AND of two 128-bit vectors of [2 x double], using /// the one's complement of the values contained in the first source operand. /// /// \headerfile /// /// This intrinsic corresponds to the VPANDN / PANDN instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing the left source operand. The /// one's complement of this value is used in the bitwise AND. /// \param __b /// A 128-bit vector of [2 x double] containing the right source operand. /// \returns A 128-bit vector of [2 x double] containing the bitwise AND of the /// values in the second operand and the one's complement of the first /// operand. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_andnot_pd(__m128d __a, __m128d __b) { return (__m128d)(~(__v2du)__a & (__v2du)__b); } /// Performs a bitwise OR of two 128-bit vectors of [2 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VPOR / POR instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing one of the source operands. /// \param __b /// A 128-bit vector of [2 x double] containing one of the source operands. /// \returns A 128-bit vector of [2 x double] containing the bitwise OR of the /// values between both operands. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_or_pd(__m128d __a, __m128d __b) { return (__m128d)((__v2du)__a | (__v2du)__b); } /// Performs a bitwise XOR of two 128-bit vectors of [2 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VPXOR / PXOR instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing one of the source operands. /// \param __b /// A 128-bit vector of [2 x double] containing one of the source operands. /// \returns A 128-bit vector of [2 x double] containing the bitwise XOR of the /// values between both operands. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_xor_pd(__m128d __a, __m128d __b) { return (__m128d)((__v2du)__a ^ (__v2du)__b); } /// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] for equality. Each comparison yields 0x0 /// for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPEQPD / CMPEQPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \param __b /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector containing the comparison results. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpeqpd((__v2df)__a, (__v2df)__b); } /// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first /// operand are less than those in the second operand. Each comparison /// yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPLTPD / CMPLTPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \param __b /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector containing the comparison results. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpltpd((__v2df)__a, (__v2df)__b); } /// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first /// operand are less than or equal to those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPLEPD / CMPLEPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \param __b /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector containing the comparison results. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmplepd((__v2df)__a, (__v2df)__b); } /// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first /// operand are greater than those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPLTPD / CMPLTPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \param __b /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector containing the comparison results. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpltpd((__v2df)__b, (__v2df)__a); } /// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first /// operand are greater than or equal to those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPLEPD / CMPLEPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \param __b /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector containing the comparison results. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmplepd((__v2df)__b, (__v2df)__a); } /// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first /// operand are ordered with respect to those in the second operand. /// /// A pair of double-precision values are "ordered" with respect to each /// other if neither value is a NaN. Each comparison yields 0x0 for false, /// 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPORDPD / CMPORDPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \param __b /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector containing the comparison results. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpordpd((__v2df)__a, (__v2df)__b); } /// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first /// operand are unordered with respect to those in the second operand. /// /// A pair of double-precision values are "unordered" with respect to each /// other if one or both values are NaN. Each comparison yields 0x0 for /// false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPUNORDPD / CMPUNORDPD /// instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \param __b /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector containing the comparison results. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpunordpd((__v2df)__a, (__v2df)__b); } /// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first /// operand are unequal to those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNEQPD / CMPNEQPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \param __b /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector containing the comparison results. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpneqpd((__v2df)__a, (__v2df)__b); } /// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first /// operand are not less than those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNLTPD / CMPNLTPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \param __b /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector containing the comparison results. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__a, (__v2df)__b); } /// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first /// operand are not less than or equal to those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNLEPD / CMPNLEPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \param __b /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector containing the comparison results. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__a, (__v2df)__b); } /// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first /// operand are not greater than those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNLTPD / CMPNLTPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \param __b /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector containing the comparison results. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__b, (__v2df)__a); } /// Compares each of the corresponding double-precision values of the /// 128-bit vectors of [2 x double] to determine if the values in the first /// operand are not greater than or equal to those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNLEPD / CMPNLEPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \param __b /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector containing the comparison results. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_pd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__b, (__v2df)__a); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] for equality. /// /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPEQSD / CMPEQSD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns A 128-bit vector. The lower 64 bits contains the comparison /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_sd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpeqsd((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is less than the corresponding value in /// the second parameter. /// /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPLTSD / CMPLTSD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns A 128-bit vector. The lower 64 bits contains the comparison /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_sd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpltsd((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is less than or equal to the /// corresponding value in the second parameter. /// /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPLESD / CMPLESD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns A 128-bit vector. The lower 64 bits contains the comparison /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_sd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmplesd((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is greater than the corresponding value /// in the second parameter. /// /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPLTSD / CMPLTSD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns A 128-bit vector. The lower 64 bits contains the comparison /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_sd(__m128d __a, __m128d __b) { __m128d __c = __builtin_ia32_cmpltsd((__v2df)__b, (__v2df)__a); return __extension__(__m128d){__c[0], __a[1]}; } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is greater than or equal to the /// corresponding value in the second parameter. /// /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPLESD / CMPLESD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns A 128-bit vector. The lower 64 bits contains the comparison /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_sd(__m128d __a, __m128d __b) { __m128d __c = __builtin_ia32_cmplesd((__v2df)__b, (__v2df)__a); return __extension__(__m128d){__c[0], __a[1]}; } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is "ordered" with respect to the /// corresponding value in the second parameter. /// /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair /// of double-precision values are "ordered" with respect to each other if /// neither value is a NaN. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPORDSD / CMPORDSD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns A 128-bit vector. The lower 64 bits contains the comparison /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_sd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpordsd((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is "unordered" with respect to the /// corresponding value in the second parameter. /// /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair /// of double-precision values are "unordered" with respect to each other if /// one or both values are NaN. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPUNORDSD / CMPUNORDSD /// instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns A 128-bit vector. The lower 64 bits contains the comparison /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_sd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpunordsd((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is unequal to the corresponding value in /// the second parameter. /// /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNEQSD / CMPNEQSD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns A 128-bit vector. The lower 64 bits contains the comparison /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_sd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpneqsd((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is not less than the corresponding /// value in the second parameter. /// /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNLTSD / CMPNLTSD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns A 128-bit vector. The lower 64 bits contains the comparison /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_sd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpnltsd((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is not less than or equal to the /// corresponding value in the second parameter. /// /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNLESD / CMPNLESD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns A 128-bit vector. The lower 64 bits contains the comparison /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_sd(__m128d __a, __m128d __b) { return (__m128d)__builtin_ia32_cmpnlesd((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is not greater than the corresponding /// value in the second parameter. /// /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNLTSD / CMPNLTSD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns A 128-bit vector. The lower 64 bits contains the comparison /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_sd(__m128d __a, __m128d __b) { __m128d __c = __builtin_ia32_cmpnltsd((__v2df)__b, (__v2df)__a); return __extension__(__m128d){__c[0], __a[1]}; } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is not greater than or equal to the /// corresponding value in the second parameter. /// /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNLESD / CMPNLESD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns A 128-bit vector. The lower 64 bits contains the comparison /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_sd(__m128d __a, __m128d __b) { __m128d __c = __builtin_ia32_cmpnlesd((__v2df)__b, (__v2df)__a); return __extension__(__m128d){__c[0], __a[1]}; } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] for equality. /// /// The comparison yields 0 for false, 1 for true. If either of the two /// lower double-precision values is NaN, 0 is returned. /// /// \headerfile /// /// This intrinsic corresponds to the VCOMISD / COMISD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison results. If either of the two /// lower double-precision values is NaN, 0 is returned. static __inline__ int __DEFAULT_FN_ATTRS _mm_comieq_sd(__m128d __a, __m128d __b) { return __builtin_ia32_comisdeq((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is less than the corresponding value in /// the second parameter. /// /// The comparison yields 0 for false, 1 for true. If either of the two /// lower double-precision values is NaN, 0 is returned. /// /// \headerfile /// /// This intrinsic corresponds to the VCOMISD / COMISD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison results. If either of the two /// lower double-precision values is NaN, 0 is returned. static __inline__ int __DEFAULT_FN_ATTRS _mm_comilt_sd(__m128d __a, __m128d __b) { return __builtin_ia32_comisdlt((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is less than or equal to the /// corresponding value in the second parameter. /// /// The comparison yields 0 for false, 1 for true. If either of the two /// lower double-precision values is NaN, 0 is returned. /// /// \headerfile /// /// This intrinsic corresponds to the VCOMISD / COMISD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison results. If either of the two /// lower double-precision values is NaN, 0 is returned. static __inline__ int __DEFAULT_FN_ATTRS _mm_comile_sd(__m128d __a, __m128d __b) { return __builtin_ia32_comisdle((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is greater than the corresponding value /// in the second parameter. /// /// The comparison yields 0 for false, 1 for true. If either of the two /// lower double-precision values is NaN, 0 is returned. /// /// \headerfile /// /// This intrinsic corresponds to the VCOMISD / COMISD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison results. If either of the two /// lower double-precision values is NaN, 0 is returned. static __inline__ int __DEFAULT_FN_ATTRS _mm_comigt_sd(__m128d __a, __m128d __b) { return __builtin_ia32_comisdgt((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is greater than or equal to the /// corresponding value in the second parameter. /// /// The comparison yields 0 for false, 1 for true. If either of the two /// lower double-precision values is NaN, 0 is returned. /// /// \headerfile /// /// This intrinsic corresponds to the VCOMISD / COMISD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison results. If either of the two /// lower double-precision values is NaN, 0 is returned. static __inline__ int __DEFAULT_FN_ATTRS _mm_comige_sd(__m128d __a, __m128d __b) { return __builtin_ia32_comisdge((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is unequal to the corresponding value in /// the second parameter. /// /// The comparison yields 0 for false, 1 for true. If either of the two /// lower double-precision values is NaN, 1 is returned. /// /// \headerfile /// /// This intrinsic corresponds to the VCOMISD / COMISD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison results. If either of the two /// lower double-precision values is NaN, 1 is returned. static __inline__ int __DEFAULT_FN_ATTRS _mm_comineq_sd(__m128d __a, __m128d __b) { return __builtin_ia32_comisdneq((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] for equality. The /// comparison yields 0 for false, 1 for true. /// /// If either of the two lower double-precision values is NaN, 0 is returned. /// /// \headerfile /// /// This intrinsic corresponds to the VUCOMISD / UCOMISD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison results. If either of the two /// lower double-precision values is NaN, 0 is returned. static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomieq_sd(__m128d __a, __m128d __b) { return __builtin_ia32_ucomisdeq((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is less than the corresponding value in /// the second parameter. /// /// The comparison yields 0 for false, 1 for true. If either of the two lower /// double-precision values is NaN, 0 is returned. /// /// \headerfile /// /// This intrinsic corresponds to the VUCOMISD / UCOMISD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison results. If either of the two /// lower double-precision values is NaN, 0 is returned. static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomilt_sd(__m128d __a, __m128d __b) { return __builtin_ia32_ucomisdlt((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is less than or equal to the /// corresponding value in the second parameter. /// /// The comparison yields 0 for false, 1 for true. If either of the two lower /// double-precision values is NaN, 0 is returned. /// /// \headerfile /// /// This intrinsic corresponds to the VUCOMISD / UCOMISD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison results. If either of the two /// lower double-precision values is NaN, 0 is returned. static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomile_sd(__m128d __a, __m128d __b) { return __builtin_ia32_ucomisdle((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is greater than the corresponding value /// in the second parameter. /// /// The comparison yields 0 for false, 1 for true. If either of the two lower /// double-precision values is NaN, 0 is returned. /// /// \headerfile /// /// This intrinsic corresponds to the VUCOMISD / UCOMISD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison results. If either of the two /// lower double-precision values is NaN, 0 is returned. static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomigt_sd(__m128d __a, __m128d __b) { return __builtin_ia32_ucomisdgt((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is greater than or equal to the /// corresponding value in the second parameter. /// /// The comparison yields 0 for false, 1 for true. If either of the two /// lower double-precision values is NaN, 0 is returned. /// /// \headerfile /// /// This intrinsic corresponds to the VUCOMISD / UCOMISD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison results. If either of the two /// lower double-precision values is NaN, 0 is returned. static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomige_sd(__m128d __a, __m128d __b) { return __builtin_ia32_ucomisdge((__v2df)__a, (__v2df)__b); } /// Compares the lower double-precision floating-point values in each of /// the two 128-bit floating-point vectors of [2 x double] to determine if /// the value in the first parameter is unequal to the corresponding value in /// the second parameter. /// /// The comparison yields 0 for false, 1 for true. If either of the two lower /// double-precision values is NaN, 1 is returned. /// /// \headerfile /// /// This intrinsic corresponds to the VUCOMISD / UCOMISD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __b. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision value is /// compared to the lower double-precision value of \a __a. /// \returns An integer containing the comparison result. If either of the two /// lower double-precision values is NaN, 1 is returned. static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomineq_sd(__m128d __a, __m128d __b) { return __builtin_ia32_ucomisdneq((__v2df)__a, (__v2df)__b); } /// Converts the two double-precision floating-point elements of a /// 128-bit vector of [2 x double] into two single-precision floating-point /// values, returned in the lower 64 bits of a 128-bit vector of [4 x float]. /// The upper 64 bits of the result vector are set to zero. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTPD2PS / CVTPD2PS instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the /// converted values. The upper 64 bits are set to zero. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtpd_ps(__m128d __a) { return __builtin_ia32_cvtpd2ps((__v2df)__a); } /// Converts the lower two single-precision floating-point elements of a /// 128-bit vector of [4 x float] into two double-precision floating-point /// values, returned in a 128-bit vector of [2 x double]. The upper two /// elements of the input vector are unused. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTPS2PD / CVTPS2PD instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower two single-precision /// floating-point elements are converted to double-precision values. The /// upper two elements are unused. /// \returns A 128-bit vector of [2 x double] containing the converted values. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtps_pd(__m128 __a) { return (__m128d) __builtin_convertvector( __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1), __v2df); } /// Converts the lower two integer elements of a 128-bit vector of /// [4 x i32] into two double-precision floating-point values, returned in a /// 128-bit vector of [2 x double]. /// /// The upper two elements of the input vector are unused. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTDQ2PD / CVTDQ2PD instruction. /// /// \param __a /// A 128-bit integer vector of [4 x i32]. The lower two integer elements are /// converted to double-precision values. /// /// The upper two elements are unused. /// \returns A 128-bit vector of [2 x double] containing the converted values. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtepi32_pd(__m128i __a) { return (__m128d) __builtin_convertvector( __builtin_shufflevector((__v4si)__a, (__v4si)__a, 0, 1), __v2df); } /// Converts the two double-precision floating-point elements of a /// 128-bit vector of [2 x double] into two signed 32-bit integer values, /// returned in the lower 64 bits of a 128-bit vector of [4 x i32]. The upper /// 64 bits of the result vector are set to zero. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTPD2DQ / CVTPD2DQ instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the /// converted values. The upper 64 bits are set to zero. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtpd_epi32(__m128d __a) { return __builtin_ia32_cvtpd2dq((__v2df)__a); } /// Converts the low-order element of a 128-bit vector of [2 x double] /// into a 32-bit signed integer value. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTSD2SI / CVTSD2SI instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the /// conversion. /// \returns A 32-bit signed integer containing the converted value. static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsd_si32(__m128d __a) { return __builtin_ia32_cvtsd2si((__v2df)__a); } /// Converts the lower double-precision floating-point element of a /// 128-bit vector of [2 x double], in the second parameter, into a /// single-precision floating-point value, returned in the lower 32 bits of a /// 128-bit vector of [4 x float]. The upper 96 bits of the result vector are /// copied from the upper 96 bits of the first parameter. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTSD2SS / CVTSD2SS instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. The upper 96 bits of this parameter are /// copied to the upper 96 bits of the result. /// \param __b /// A 128-bit vector of [2 x double]. The lower double-precision /// floating-point element is used in the conversion. /// \returns A 128-bit vector of [4 x float]. The lower 32 bits contain the /// converted value from the second parameter. The upper 96 bits are copied /// from the upper 96 bits of the first parameter. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsd_ss(__m128 __a, __m128d __b) { return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b); } /// Converts a 32-bit signed integer value, in the second parameter, into /// a double-precision floating-point value, returned in the lower 64 bits of /// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector /// are copied from the upper 64 bits of the first parameter. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTSI2SD / CVTSI2SD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are /// copied to the upper 64 bits of the result. /// \param __b /// A 32-bit signed integer containing the value to be converted. /// \returns A 128-bit vector of [2 x double]. The lower 64 bits contain the /// converted value from the second parameter. The upper 64 bits are copied /// from the upper 64 bits of the first parameter. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtsi32_sd(__m128d __a, int __b) { __a[0] = __b; return __a; } /// Converts the lower single-precision floating-point element of a /// 128-bit vector of [4 x float], in the second parameter, into a /// double-precision floating-point value, returned in the lower 64 bits of /// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector /// are copied from the upper 64 bits of the first parameter. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTSS2SD / CVTSS2SD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are /// copied to the upper 64 bits of the result. /// \param __b /// A 128-bit vector of [4 x float]. The lower single-precision /// floating-point element is used in the conversion. /// \returns A 128-bit vector of [2 x double]. The lower 64 bits contain the /// converted value from the second parameter. The upper 64 bits are copied /// from the upper 64 bits of the first parameter. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtss_sd(__m128d __a, __m128 __b) { __a[0] = __b[0]; return __a; } /// Converts the two double-precision floating-point elements of a /// 128-bit vector of [2 x double] into two signed 32-bit integer values, /// returned in the lower 64 bits of a 128-bit vector of [4 x i32]. /// /// If the result of either conversion is inexact, the result is truncated /// (rounded towards zero) regardless of the current MXCSR setting. The upper /// 64 bits of the result vector are set to zero. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTTPD2DQ / CVTTPD2DQ /// instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the /// converted values. The upper 64 bits are set to zero. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttpd_epi32(__m128d __a) { return (__m128i)__builtin_ia32_cvttpd2dq((__v2df)__a); } /// Converts the low-order element of a [2 x double] vector into a 32-bit /// signed integer value, truncating the result when it is inexact. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTTSD2SI / CVTTSD2SI /// instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the /// conversion. /// \returns A 32-bit signed integer containing the converted value. static __inline__ int __DEFAULT_FN_ATTRS _mm_cvttsd_si32(__m128d __a) { return __builtin_ia32_cvttsd2si((__v2df)__a); } /// Converts the two double-precision floating-point elements of a /// 128-bit vector of [2 x double] into two signed 32-bit integer values, /// returned in a 64-bit vector of [2 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the CVTPD2PI instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \returns A 64-bit vector of [2 x i32] containing the converted values. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvtpd_pi32(__m128d __a) { return (__m64)__builtin_ia32_cvtpd2pi((__v2df)__a); } /// Converts the two double-precision floating-point elements of a /// 128-bit vector of [2 x double] into two signed 32-bit integer values, /// returned in a 64-bit vector of [2 x i32]. /// /// If the result of either conversion is inexact, the result is truncated /// (rounded towards zero) regardless of the current MXCSR setting. /// /// \headerfile /// /// This intrinsic corresponds to the CVTTPD2PI instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. /// \returns A 64-bit vector of [2 x i32] containing the converted values. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvttpd_pi32(__m128d __a) { return (__m64)__builtin_ia32_cvttpd2pi((__v2df)__a); } /// Converts the two signed 32-bit integer elements of a 64-bit vector of /// [2 x i32] into two double-precision floating-point values, returned in a /// 128-bit vector of [2 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the CVTPI2PD instruction. /// /// \param __a /// A 64-bit vector of [2 x i32]. /// \returns A 128-bit vector of [2 x double] containing the converted values. static __inline__ __m128d __DEFAULT_FN_ATTRS_MMX _mm_cvtpi32_pd(__m64 __a) { return __builtin_ia32_cvtpi2pd((__v2si)__a); } /// Returns the low-order element of a 128-bit vector of [2 x double] as /// a double-precision floating-point value. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower 64 bits are returned. /// \returns A double-precision floating-point value copied from the lower 64 /// bits of \a __a. static __inline__ double __DEFAULT_FN_ATTRS _mm_cvtsd_f64(__m128d __a) { return __a[0]; } /// Loads a 128-bit floating-point vector of [2 x double] from an aligned /// memory location. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVAPD / MOVAPD instruction. /// /// \param __dp /// A pointer to a 128-bit memory location. The address of the memory /// location has to be 16-byte aligned. /// \returns A 128-bit vector of [2 x double] containing the loaded values. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load_pd(double const *__dp) { return *(const __m128d *)__dp; } /// Loads a double-precision floating-point value from a specified memory /// location and duplicates it to both vector elements of a 128-bit vector of /// [2 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVDDUP / MOVDDUP instruction. /// /// \param __dp /// A pointer to a memory location containing a double-precision value. /// \returns A 128-bit vector of [2 x double] containing the loaded and /// duplicated values. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load1_pd(double const *__dp) { struct __mm_load1_pd_struct { double __u; } __attribute__((__packed__, __may_alias__)); double __u = ((const struct __mm_load1_pd_struct *)__dp)->__u; return __extension__(__m128d){__u, __u}; } #define _mm_load_pd1(dp) _mm_load1_pd(dp) /// Loads two double-precision values, in reverse order, from an aligned /// memory location into a 128-bit vector of [2 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVAPD / MOVAPD instruction + /// needed shuffling instructions. In AVX mode, the shuffling may be combined /// with the \c VMOVAPD, resulting in only a \c VPERMILPD instruction. /// /// \param __dp /// A 16-byte aligned pointer to an array of double-precision values to be /// loaded in reverse order. /// \returns A 128-bit vector of [2 x double] containing the reversed loaded /// values. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadr_pd(double const *__dp) { __m128d __u = *(const __m128d *)__dp; return __builtin_shufflevector((__v2df)__u, (__v2df)__u, 1, 0); } /// Loads a 128-bit floating-point vector of [2 x double] from an /// unaligned memory location. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVUPD / MOVUPD instruction. /// /// \param __dp /// A pointer to a 128-bit memory location. The address of the memory /// location does not have to be aligned. /// \returns A 128-bit vector of [2 x double] containing the loaded values. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadu_pd(double const *__dp) { struct __loadu_pd { __m128d_u __v; } __attribute__((__packed__, __may_alias__)); return ((const struct __loadu_pd *)__dp)->__v; } /// Loads a 64-bit integer value to the low element of a 128-bit integer /// vector and clears the upper element. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVQ / MOVQ instruction. /// /// \param __a /// A pointer to a 64-bit memory location. The address of the memory /// location does not have to be aligned. /// \returns A 128-bit vector of [2 x i64] containing the loaded value. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si64(void const *__a) { struct __loadu_si64 { long long __v; } __attribute__((__packed__, __may_alias__)); long long __u = ((const struct __loadu_si64 *)__a)->__v; return __extension__(__m128i)(__v2di){__u, 0LL}; } /// Loads a 32-bit integer value to the low element of a 128-bit integer /// vector and clears the upper element. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVD / MOVD instruction. /// /// \param __a /// A pointer to a 32-bit memory location. The address of the memory /// location does not have to be aligned. /// \returns A 128-bit vector of [4 x i32] containing the loaded value. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si32(void const *__a) { struct __loadu_si32 { int __v; } __attribute__((__packed__, __may_alias__)); int __u = ((const struct __loadu_si32 *)__a)->__v; return __extension__(__m128i)(__v4si){__u, 0, 0, 0}; } /// Loads a 16-bit integer value to the low element of a 128-bit integer /// vector and clears the upper element. /// /// \headerfile /// /// This intrinsic does not correspond to a specific instruction. /// /// \param __a /// A pointer to a 16-bit memory location. The address of the memory /// location does not have to be aligned. /// \returns A 128-bit vector of [8 x i16] containing the loaded value. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si16(void const *__a) { struct __loadu_si16 { short __v; } __attribute__((__packed__, __may_alias__)); short __u = ((const struct __loadu_si16 *)__a)->__v; return __extension__(__m128i)(__v8hi){__u, 0, 0, 0, 0, 0, 0, 0}; } /// Loads a 64-bit double-precision value to the low element of a /// 128-bit integer vector and clears the upper element. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVSD / MOVSD instruction. /// /// \param __dp /// A pointer to a memory location containing a double-precision value. /// The address of the memory location does not have to be aligned. /// \returns A 128-bit vector of [2 x double] containing the loaded value. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load_sd(double const *__dp) { struct __mm_load_sd_struct { double __u; } __attribute__((__packed__, __may_alias__)); double __u = ((const struct __mm_load_sd_struct *)__dp)->__u; return __extension__(__m128d){__u, 0}; } /// Loads a double-precision value into the high-order bits of a 128-bit /// vector of [2 x double]. The low-order bits are copied from the low-order /// bits of the first operand. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVHPD / MOVHPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. \n /// Bits [63:0] are written to bits [63:0] of the result. /// \param __dp /// A pointer to a 64-bit memory location containing a double-precision /// floating-point value that is loaded. The loaded value is written to bits /// [127:64] of the result. The address of the memory location does not have /// to be aligned. /// \returns A 128-bit vector of [2 x double] containing the moved values. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadh_pd(__m128d __a, double const *__dp) { struct __mm_loadh_pd_struct { double __u; } __attribute__((__packed__, __may_alias__)); double __u = ((const struct __mm_loadh_pd_struct *)__dp)->__u; return __extension__(__m128d){__a[0], __u}; } /// Loads a double-precision value into the low-order bits of a 128-bit /// vector of [2 x double]. The high-order bits are copied from the /// high-order bits of the first operand. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVLPD / MOVLPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. \n /// Bits [127:64] are written to bits [127:64] of the result. /// \param __dp /// A pointer to a 64-bit memory location containing a double-precision /// floating-point value that is loaded. The loaded value is written to bits /// [63:0] of the result. The address of the memory location does not have to /// be aligned. /// \returns A 128-bit vector of [2 x double] containing the moved values. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadl_pd(__m128d __a, double const *__dp) { struct __mm_loadl_pd_struct { double __u; } __attribute__((__packed__, __may_alias__)); double __u = ((const struct __mm_loadl_pd_struct *)__dp)->__u; return __extension__(__m128d){__u, __a[1]}; } /// Constructs a 128-bit floating-point vector of [2 x double] with /// unspecified content. This could be used as an argument to another /// intrinsic function where the argument is required but the value is not /// actually used. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \returns A 128-bit floating-point vector of [2 x double] with unspecified /// content. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_undefined_pd(void) { return (__m128d)__builtin_ia32_undef128(); } /// Constructs a 128-bit floating-point vector of [2 x double]. The lower /// 64 bits of the vector are initialized with the specified double-precision /// floating-point value. The upper 64 bits are set to zero. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVQ / MOVQ instruction. /// /// \param __w /// A double-precision floating-point value used to initialize the lower 64 /// bits of the result. /// \returns An initialized 128-bit floating-point vector of [2 x double]. The /// lower 64 bits contain the value of the parameter. The upper 64 bits are /// set to zero. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_sd(double __w) { return __extension__(__m128d){__w, 0}; } /// Constructs a 128-bit floating-point vector of [2 x double], with each /// of the two double-precision floating-point vector elements set to the /// specified double-precision floating-point value. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVDDUP / MOVLHPS instruction. /// /// \param __w /// A double-precision floating-point value used to initialize each vector /// element of the result. /// \returns An initialized 128-bit floating-point vector of [2 x double]. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set1_pd(double __w) { return __extension__(__m128d){__w, __w}; } /// Constructs a 128-bit floating-point vector of [2 x double], with each /// of the two double-precision floating-point vector elements set to the /// specified double-precision floating-point value. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVDDUP / MOVLHPS instruction. /// /// \param __w /// A double-precision floating-point value used to initialize each vector /// element of the result. /// \returns An initialized 128-bit floating-point vector of [2 x double]. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_pd1(double __w) { return _mm_set1_pd(__w); } /// Constructs a 128-bit floating-point vector of [2 x double] /// initialized with the specified double-precision floating-point values. /// /// \headerfile /// /// This intrinsic corresponds to the VUNPCKLPD / UNPCKLPD instruction. /// /// \param __w /// A double-precision floating-point value used to initialize the upper 64 /// bits of the result. /// \param __x /// A double-precision floating-point value used to initialize the lower 64 /// bits of the result. /// \returns An initialized 128-bit floating-point vector of [2 x double]. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_pd(double __w, double __x) { return __extension__(__m128d){__x, __w}; } /// Constructs a 128-bit floating-point vector of [2 x double], /// initialized in reverse order with the specified double-precision /// floating-point values. /// /// \headerfile /// /// This intrinsic corresponds to the VUNPCKLPD / UNPCKLPD instruction. /// /// \param __w /// A double-precision floating-point value used to initialize the lower 64 /// bits of the result. /// \param __x /// A double-precision floating-point value used to initialize the upper 64 /// bits of the result. /// \returns An initialized 128-bit floating-point vector of [2 x double]. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setr_pd(double __w, double __x) { return __extension__(__m128d){__w, __x}; } /// Constructs a 128-bit floating-point vector of [2 x double] /// initialized to zero. /// /// \headerfile /// /// This intrinsic corresponds to the VXORPS / XORPS instruction. /// /// \returns An initialized 128-bit floating-point vector of [2 x double] with /// all elements set to zero. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void) { return __extension__(__m128d){0.0, 0.0}; } /// Constructs a 128-bit floating-point vector of [2 x double]. The lower /// 64 bits are set to the lower 64 bits of the second parameter. The upper /// 64 bits are set to the upper 64 bits of the first parameter. /// /// \headerfile /// /// This intrinsic corresponds to the VBLENDPD / BLENDPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The upper 64 bits are written to the /// upper 64 bits of the result. /// \param __b /// A 128-bit vector of [2 x double]. The lower 64 bits are written to the /// lower 64 bits of the result. /// \returns A 128-bit vector of [2 x double] containing the moved values. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_move_sd(__m128d __a, __m128d __b) { __a[0] = __b[0]; return __a; } /// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a /// memory location. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVSD / MOVSD instruction. /// /// \param __dp /// A pointer to a 64-bit memory location. /// \param __a /// A 128-bit vector of [2 x double] containing the value to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_store_sd(double *__dp, __m128d __a) { struct __mm_store_sd_struct { double __u; } __attribute__((__packed__, __may_alias__)); ((struct __mm_store_sd_struct *)__dp)->__u = __a[0]; } /// Moves packed double-precision values from a 128-bit vector of /// [2 x double] to a memory location. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVAPD / MOVAPS instruction. /// /// \param __dp /// A pointer to an aligned memory location that can store two /// double-precision values. /// \param __a /// A packed 128-bit vector of [2 x double] containing the values to be /// moved. static __inline__ void __DEFAULT_FN_ATTRS _mm_store_pd(double *__dp, __m128d __a) { *(__m128d *)__dp = __a; } /// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to /// the upper and lower 64 bits of a memory location. /// /// \headerfile /// /// This intrinsic corresponds to the /// VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS instruction. /// /// \param __dp /// A pointer to a memory location that can store two double-precision /// values. /// \param __a /// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each /// of the values in \a __dp. static __inline__ void __DEFAULT_FN_ATTRS _mm_store1_pd(double *__dp, __m128d __a) { __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0); _mm_store_pd(__dp, __a); } /// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to /// the upper and lower 64 bits of a memory location. /// /// \headerfile /// /// This intrinsic corresponds to the /// VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS instruction. /// /// \param __dp /// A pointer to a memory location that can store two double-precision /// values. /// \param __a /// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each /// of the values in \a __dp. static __inline__ void __DEFAULT_FN_ATTRS _mm_store_pd1(double *__dp, __m128d __a) { _mm_store1_pd(__dp, __a); } /// Stores a 128-bit vector of [2 x double] into an unaligned memory /// location. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVUPD / MOVUPD instruction. /// /// \param __dp /// A pointer to a 128-bit memory location. The address of the memory /// location does not have to be aligned. /// \param __a /// A 128-bit vector of [2 x double] containing the values to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_pd(double *__dp, __m128d __a) { struct __storeu_pd { __m128d_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_pd *)__dp)->__v = __a; } /// Stores two double-precision values, in reverse order, from a 128-bit /// vector of [2 x double] to a 16-byte aligned memory location. /// /// \headerfile /// /// This intrinsic corresponds to a shuffling instruction followed by a /// VMOVAPD / MOVAPD instruction. /// /// \param __dp /// A pointer to a 16-byte aligned memory location that can store two /// double-precision values. /// \param __a /// A 128-bit vector of [2 x double] containing the values to be reversed and /// stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_storer_pd(double *__dp, __m128d __a) { __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 1, 0); *(__m128d *)__dp = __a; } /// Stores the upper 64 bits of a 128-bit vector of [2 x double] to a /// memory location. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVHPD / MOVHPD instruction. /// /// \param __dp /// A pointer to a 64-bit memory location. /// \param __a /// A 128-bit vector of [2 x double] containing the value to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_storeh_pd(double *__dp, __m128d __a) { struct __mm_storeh_pd_struct { double __u; } __attribute__((__packed__, __may_alias__)); ((struct __mm_storeh_pd_struct *)__dp)->__u = __a[1]; } /// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a /// memory location. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVLPD / MOVLPD instruction. /// /// \param __dp /// A pointer to a 64-bit memory location. /// \param __a /// A 128-bit vector of [2 x double] containing the value to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pd(double *__dp, __m128d __a) { struct __mm_storeh_pd_struct { double __u; } __attribute__((__packed__, __may_alias__)); ((struct __mm_storeh_pd_struct *)__dp)->__u = __a[0]; } /// Adds the corresponding elements of two 128-bit vectors of [16 x i8], /// saving the lower 8 bits of each sum in the corresponding element of a /// 128-bit result vector of [16 x i8]. /// /// The integer elements of both parameters can be either signed or unsigned. /// /// \headerfile /// /// This intrinsic corresponds to the VPADDB / PADDB instruction. /// /// \param __a /// A 128-bit vector of [16 x i8]. /// \param __b /// A 128-bit vector of [16 x i8]. /// \returns A 128-bit vector of [16 x i8] containing the sums of both /// parameters. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a, __m128i __b) { return (__m128i)((__v16qu)__a + (__v16qu)__b); } /// Adds the corresponding elements of two 128-bit vectors of [8 x i16], /// saving the lower 16 bits of each sum in the corresponding element of a /// 128-bit result vector of [8 x i16]. /// /// The integer elements of both parameters can be either signed or unsigned. /// /// \headerfile /// /// This intrinsic corresponds to the VPADDW / PADDW instruction. /// /// \param __a /// A 128-bit vector of [8 x i16]. /// \param __b /// A 128-bit vector of [8 x i16]. /// \returns A 128-bit vector of [8 x i16] containing the sums of both /// parameters. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16(__m128i __a, __m128i __b) { return (__m128i)((__v8hu)__a + (__v8hu)__b); } /// Adds the corresponding elements of two 128-bit vectors of [4 x i32], /// saving the lower 32 bits of each sum in the corresponding element of a /// 128-bit result vector of [4 x i32]. /// /// The integer elements of both parameters can be either signed or unsigned. /// /// \headerfile /// /// This intrinsic corresponds to the VPADDD / PADDD instruction. /// /// \param __a /// A 128-bit vector of [4 x i32]. /// \param __b /// A 128-bit vector of [4 x i32]. /// \returns A 128-bit vector of [4 x i32] containing the sums of both /// parameters. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi32(__m128i __a, __m128i __b) { return (__m128i)((__v4su)__a + (__v4su)__b); } /// Adds two signed or unsigned 64-bit integer values, returning the /// lower 64 bits of the sum. /// /// \headerfile /// /// This intrinsic corresponds to the PADDQ instruction. /// /// \param __a /// A 64-bit integer. /// \param __b /// A 64-bit integer. /// \returns A 64-bit integer containing the sum of both parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_add_si64(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_paddq((__v1di)__a, (__v1di)__b); } /// Adds the corresponding elements of two 128-bit vectors of [2 x i64], /// saving the lower 64 bits of each sum in the corresponding element of a /// 128-bit result vector of [2 x i64]. /// /// The integer elements of both parameters can be either signed or unsigned. /// /// \headerfile /// /// This intrinsic corresponds to the VPADDQ / PADDQ instruction. /// /// \param __a /// A 128-bit vector of [2 x i64]. /// \param __b /// A 128-bit vector of [2 x i64]. /// \returns A 128-bit vector of [2 x i64] containing the sums of both /// parameters. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi64(__m128i __a, __m128i __b) { return (__m128i)((__v2du)__a + (__v2du)__b); } /// Adds, with saturation, the corresponding elements of two 128-bit /// signed [16 x i8] vectors, saving each sum in the corresponding element of /// a 128-bit result vector of [16 x i8]. Positive sums greater than 0x7F are /// saturated to 0x7F. Negative sums less than 0x80 are saturated to 0x80. /// /// \headerfile /// /// This intrinsic corresponds to the VPADDSB / PADDSB instruction. /// /// \param __a /// A 128-bit signed [16 x i8] vector. /// \param __b /// A 128-bit signed [16 x i8] vector. /// \returns A 128-bit signed [16 x i8] vector containing the saturated sums of /// both parameters. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi8(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_add_sat((__v16qs)__a, (__v16qs)__b); } /// Adds, with saturation, the corresponding elements of two 128-bit /// signed [8 x i16] vectors, saving each sum in the corresponding element of /// a 128-bit result vector of [8 x i16]. Positive sums greater than 0x7FFF /// are saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to /// 0x8000. /// /// \headerfile /// /// This intrinsic corresponds to the VPADDSW / PADDSW instruction. /// /// \param __a /// A 128-bit signed [8 x i16] vector. /// \param __b /// A 128-bit signed [8 x i16] vector. /// \returns A 128-bit signed [8 x i16] vector containing the saturated sums of /// both parameters. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_add_sat((__v8hi)__a, (__v8hi)__b); } /// Adds, with saturation, the corresponding elements of two 128-bit /// unsigned [16 x i8] vectors, saving each sum in the corresponding element /// of a 128-bit result vector of [16 x i8]. Positive sums greater than 0xFF /// are saturated to 0xFF. Negative sums are saturated to 0x00. /// /// \headerfile /// /// This intrinsic corresponds to the VPADDUSB / PADDUSB instruction. /// /// \param __a /// A 128-bit unsigned [16 x i8] vector. /// \param __b /// A 128-bit unsigned [16 x i8] vector. /// \returns A 128-bit unsigned [16 x i8] vector containing the saturated sums /// of both parameters. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu8(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_add_sat((__v16qu)__a, (__v16qu)__b); } /// Adds, with saturation, the corresponding elements of two 128-bit /// unsigned [8 x i16] vectors, saving each sum in the corresponding element /// of a 128-bit result vector of [8 x i16]. Positive sums greater than /// 0xFFFF are saturated to 0xFFFF. Negative sums are saturated to 0x0000. /// /// \headerfile /// /// This intrinsic corresponds to the VPADDUSB / PADDUSB instruction. /// /// \param __a /// A 128-bit unsigned [8 x i16] vector. /// \param __b /// A 128-bit unsigned [8 x i16] vector. /// \returns A 128-bit unsigned [8 x i16] vector containing the saturated sums /// of both parameters. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu16(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_add_sat((__v8hu)__a, (__v8hu)__b); } /// Computes the rounded averages of corresponding elements of two /// 128-bit unsigned [16 x i8] vectors, saving each result in the /// corresponding element of a 128-bit result vector of [16 x i8]. /// /// \headerfile /// /// This intrinsic corresponds to the VPAVGB / PAVGB instruction. /// /// \param __a /// A 128-bit unsigned [16 x i8] vector. /// \param __b /// A 128-bit unsigned [16 x i8] vector. /// \returns A 128-bit unsigned [16 x i8] vector containing the rounded /// averages of both parameters. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b); } /// Computes the rounded averages of corresponding elements of two /// 128-bit unsigned [8 x i16] vectors, saving each result in the /// corresponding element of a 128-bit result vector of [8 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the VPAVGW / PAVGW instruction. /// /// \param __a /// A 128-bit unsigned [8 x i16] vector. /// \param __b /// A 128-bit unsigned [8 x i16] vector. /// \returns A 128-bit unsigned [8 x i16] vector containing the rounded /// averages of both parameters. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b); } /// Multiplies the corresponding elements of two 128-bit signed [8 x i16] /// vectors, producing eight intermediate 32-bit signed integer products, and /// adds the consecutive pairs of 32-bit products to form a 128-bit signed /// [4 x i32] vector. /// /// For example, bits [15:0] of both parameters are multiplied producing a /// 32-bit product, bits [31:16] of both parameters are multiplied producing /// a 32-bit product, and the sum of those two products becomes bits [31:0] /// of the result. /// /// \headerfile /// /// This intrinsic corresponds to the VPMADDWD / PMADDWD instruction. /// /// \param __a /// A 128-bit signed [8 x i16] vector. /// \param __b /// A 128-bit signed [8 x i16] vector. /// \returns A 128-bit signed [4 x i32] vector containing the sums of products /// of both parameters. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_madd_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b); } /// Compares corresponding elements of two 128-bit signed [8 x i16] /// vectors, saving the greater value from each comparison in the /// corresponding element of a 128-bit result vector of [8 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the VPMAXSW / PMAXSW instruction. /// /// \param __a /// A 128-bit signed [8 x i16] vector. /// \param __b /// A 128-bit signed [8 x i16] vector. /// \returns A 128-bit signed [8 x i16] vector containing the greater value of /// each comparison. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_max((__v8hi)__a, (__v8hi)__b); } /// Compares corresponding elements of two 128-bit unsigned [16 x i8] /// vectors, saving the greater value from each comparison in the /// corresponding element of a 128-bit result vector of [16 x i8]. /// /// \headerfile /// /// This intrinsic corresponds to the VPMAXUB / PMAXUB instruction. /// /// \param __a /// A 128-bit unsigned [16 x i8] vector. /// \param __b /// A 128-bit unsigned [16 x i8] vector. /// \returns A 128-bit unsigned [16 x i8] vector containing the greater value of /// each comparison. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu8(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_max((__v16qu)__a, (__v16qu)__b); } /// Compares corresponding elements of two 128-bit signed [8 x i16] /// vectors, saving the smaller value from each comparison in the /// corresponding element of a 128-bit result vector of [8 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the VPMINSW / PMINSW instruction. /// /// \param __a /// A 128-bit signed [8 x i16] vector. /// \param __b /// A 128-bit signed [8 x i16] vector. /// \returns A 128-bit signed [8 x i16] vector containing the smaller value of /// each comparison. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_min((__v8hi)__a, (__v8hi)__b); } /// Compares corresponding elements of two 128-bit unsigned [16 x i8] /// vectors, saving the smaller value from each comparison in the /// corresponding element of a 128-bit result vector of [16 x i8]. /// /// \headerfile /// /// This intrinsic corresponds to the VPMINUB / PMINUB instruction. /// /// \param __a /// A 128-bit unsigned [16 x i8] vector. /// \param __b /// A 128-bit unsigned [16 x i8] vector. /// \returns A 128-bit unsigned [16 x i8] vector containing the smaller value of /// each comparison. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu8(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_min((__v16qu)__a, (__v16qu)__b); } /// Multiplies the corresponding elements of two signed [8 x i16] /// vectors, saving the upper 16 bits of each 32-bit product in the /// corresponding element of a 128-bit signed [8 x i16] result vector. /// /// \headerfile /// /// This intrinsic corresponds to the VPMULHW / PMULHW instruction. /// /// \param __a /// A 128-bit signed [8 x i16] vector. /// \param __b /// A 128-bit signed [8 x i16] vector. /// \returns A 128-bit signed [8 x i16] vector containing the upper 16 bits of /// each of the eight 32-bit products. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b); } /// Multiplies the corresponding elements of two unsigned [8 x i16] /// vectors, saving the upper 16 bits of each 32-bit product in the /// corresponding element of a 128-bit unsigned [8 x i16] result vector. /// /// \headerfile /// /// This intrinsic corresponds to the VPMULHUW / PMULHUW instruction. /// /// \param __a /// A 128-bit unsigned [8 x i16] vector. /// \param __b /// A 128-bit unsigned [8 x i16] vector. /// \returns A 128-bit unsigned [8 x i16] vector containing the upper 16 bits /// of each of the eight 32-bit products. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epu16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b); } /// Multiplies the corresponding elements of two signed [8 x i16] /// vectors, saving the lower 16 bits of each 32-bit product in the /// corresponding element of a 128-bit signed [8 x i16] result vector. /// /// \headerfile /// /// This intrinsic corresponds to the VPMULLW / PMULLW instruction. /// /// \param __a /// A 128-bit signed [8 x i16] vector. /// \param __b /// A 128-bit signed [8 x i16] vector. /// \returns A 128-bit signed [8 x i16] vector containing the lower 16 bits of /// each of the eight 32-bit products. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi16(__m128i __a, __m128i __b) { return (__m128i)((__v8hu)__a * (__v8hu)__b); } /// Multiplies 32-bit unsigned integer values contained in the lower bits /// of the two 64-bit integer vectors and returns the 64-bit unsigned /// product. /// /// \headerfile /// /// This intrinsic corresponds to the PMULUDQ instruction. /// /// \param __a /// A 64-bit integer containing one of the source operands. /// \param __b /// A 64-bit integer containing one of the source operands. /// \returns A 64-bit integer vector containing the product of both operands. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_mul_su32(__m64 __a, __m64 __b) { return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b); } /// Multiplies 32-bit unsigned integer values contained in the lower /// bits of the corresponding elements of two [2 x i64] vectors, and returns /// the 64-bit products in the corresponding elements of a [2 x i64] vector. /// /// \headerfile /// /// This intrinsic corresponds to the VPMULUDQ / PMULUDQ instruction. /// /// \param __a /// A [2 x i64] vector containing one of the source operands. /// \param __b /// A [2 x i64] vector containing one of the source operands. /// \returns A [2 x i64] vector containing the product of both operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epu32(__m128i __a, __m128i __b) { return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b); } /// Computes the absolute differences of corresponding 8-bit integer /// values in two 128-bit vectors. Sums the first 8 absolute differences, and /// separately sums the second 8 absolute differences. Packs these two /// unsigned 16-bit integer sums into the upper and lower elements of a /// [2 x i64] vector. /// /// \headerfile /// /// This intrinsic corresponds to the VPSADBW / PSADBW instruction. /// /// \param __a /// A 128-bit integer vector containing one of the source operands. /// \param __b /// A 128-bit integer vector containing one of the source operands. /// \returns A [2 x i64] vector containing the sums of the sets of absolute /// differences between both operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sad_epu8(__m128i __a, __m128i __b) { return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b); } /// Subtracts the corresponding 8-bit integer values in the operands. /// /// \headerfile /// /// This intrinsic corresponds to the VPSUBB / PSUBB instruction. /// /// \param __a /// A 128-bit integer vector containing the minuends. /// \param __b /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the differences of the values /// in the operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a, __m128i __b) { return (__m128i)((__v16qu)__a - (__v16qu)__b); } /// Subtracts the corresponding 16-bit integer values in the operands. /// /// \headerfile /// /// This intrinsic corresponds to the VPSUBW / PSUBW instruction. /// /// \param __a /// A 128-bit integer vector containing the minuends. /// \param __b /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the differences of the values /// in the operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16(__m128i __a, __m128i __b) { return (__m128i)((__v8hu)__a - (__v8hu)__b); } /// Subtracts the corresponding 32-bit integer values in the operands. /// /// \headerfile /// /// This intrinsic corresponds to the VPSUBD / PSUBD instruction. /// /// \param __a /// A 128-bit integer vector containing the minuends. /// \param __b /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the differences of the values /// in the operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi32(__m128i __a, __m128i __b) { return (__m128i)((__v4su)__a - (__v4su)__b); } /// Subtracts signed or unsigned 64-bit integer values and writes the /// difference to the corresponding bits in the destination. /// /// \headerfile /// /// This intrinsic corresponds to the PSUBQ instruction. /// /// \param __a /// A 64-bit integer vector containing the minuend. /// \param __b /// A 64-bit integer vector containing the subtrahend. /// \returns A 64-bit integer vector containing the difference of the values in /// the operands. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sub_si64(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_psubq((__v1di)__a, (__v1di)__b); } /// Subtracts the corresponding elements of two [2 x i64] vectors. /// /// \headerfile /// /// This intrinsic corresponds to the VPSUBQ / PSUBQ instruction. /// /// \param __a /// A 128-bit integer vector containing the minuends. /// \param __b /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the differences of the values /// in the operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi64(__m128i __a, __m128i __b) { return (__m128i)((__v2du)__a - (__v2du)__b); } /// Subtracts corresponding 8-bit signed integer values in the input and /// returns the differences in the corresponding bytes in the destination. /// Differences greater than 0x7F are saturated to 0x7F, and differences less /// than 0x80 are saturated to 0x80. /// /// \headerfile /// /// This intrinsic corresponds to the VPSUBSB / PSUBSB instruction. /// /// \param __a /// A 128-bit integer vector containing the minuends. /// \param __b /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the differences of the values /// in the operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi8(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_sub_sat((__v16qs)__a, (__v16qs)__b); } /// Subtracts corresponding 16-bit signed integer values in the input and /// returns the differences in the corresponding bytes in the destination. /// Differences greater than 0x7FFF are saturated to 0x7FFF, and values less /// than 0x8000 are saturated to 0x8000. /// /// \headerfile /// /// This intrinsic corresponds to the VPSUBSW / PSUBSW instruction. /// /// \param __a /// A 128-bit integer vector containing the minuends. /// \param __b /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the differences of the values /// in the operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_sub_sat((__v8hi)__a, (__v8hi)__b); } /// Subtracts corresponding 8-bit unsigned integer values in the input /// and returns the differences in the corresponding bytes in the /// destination. Differences less than 0x00 are saturated to 0x00. /// /// \headerfile /// /// This intrinsic corresponds to the VPSUBUSB / PSUBUSB instruction. /// /// \param __a /// A 128-bit integer vector containing the minuends. /// \param __b /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the unsigned integer /// differences of the values in the operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu8(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_sub_sat((__v16qu)__a, (__v16qu)__b); } /// Subtracts corresponding 16-bit unsigned integer values in the input /// and returns the differences in the corresponding bytes in the /// destination. Differences less than 0x0000 are saturated to 0x0000. /// /// \headerfile /// /// This intrinsic corresponds to the VPSUBUSW / PSUBUSW instruction. /// /// \param __a /// A 128-bit integer vector containing the minuends. /// \param __b /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the unsigned integer /// differences of the values in the operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu16(__m128i __a, __m128i __b) { return (__m128i)__builtin_elementwise_sub_sat((__v8hu)__a, (__v8hu)__b); } /// Performs a bitwise AND of two 128-bit integer vectors. /// /// \headerfile /// /// This intrinsic corresponds to the VPAND / PAND instruction. /// /// \param __a /// A 128-bit integer vector containing one of the source operands. /// \param __b /// A 128-bit integer vector containing one of the source operands. /// \returns A 128-bit integer vector containing the bitwise AND of the values /// in both operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a, __m128i __b) { return (__m128i)((__v2du)__a & (__v2du)__b); } /// Performs a bitwise AND of two 128-bit integer vectors, using the /// one's complement of the values contained in the first source operand. /// /// \headerfile /// /// This intrinsic corresponds to the VPANDN / PANDN instruction. /// /// \param __a /// A 128-bit vector containing the left source operand. The one's complement /// of this value is used in the bitwise AND. /// \param __b /// A 128-bit vector containing the right source operand. /// \returns A 128-bit integer vector containing the bitwise AND of the one's /// complement of the first operand and the values in the second operand. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_andnot_si128(__m128i __a, __m128i __b) { return (__m128i)(~(__v2du)__a & (__v2du)__b); } /// Performs a bitwise OR of two 128-bit integer vectors. /// /// \headerfile /// /// This intrinsic corresponds to the VPOR / POR instruction. /// /// \param __a /// A 128-bit integer vector containing one of the source operands. /// \param __b /// A 128-bit integer vector containing one of the source operands. /// \returns A 128-bit integer vector containing the bitwise OR of the values /// in both operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_or_si128(__m128i __a, __m128i __b) { return (__m128i)((__v2du)__a | (__v2du)__b); } /// Performs a bitwise exclusive OR of two 128-bit integer vectors. /// /// \headerfile /// /// This intrinsic corresponds to the VPXOR / PXOR instruction. /// /// \param __a /// A 128-bit integer vector containing one of the source operands. /// \param __b /// A 128-bit integer vector containing one of the source operands. /// \returns A 128-bit integer vector containing the bitwise exclusive OR of the /// values in both operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_xor_si128(__m128i __a, __m128i __b) { return (__m128i)((__v2du)__a ^ (__v2du)__b); } /// Left-shifts the 128-bit integer vector operand by the specified /// number of bytes. Low-order bits are cleared. /// /// \headerfile /// /// \code /// __m128i _mm_slli_si128(__m128i a, const int imm); /// \endcode /// /// This intrinsic corresponds to the VPSLLDQ / PSLLDQ instruction. /// /// \param a /// A 128-bit integer vector containing the source operand. /// \param imm /// An immediate value specifying the number of bytes to left-shift operand /// \a a. /// \returns A 128-bit integer vector containing the left-shifted value. #define _mm_slli_si128(a, imm) \ ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), \ (int)(imm))) #define _mm_bslli_si128(a, imm) \ ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), \ (int)(imm))) /// Left-shifts each 16-bit value in the 128-bit integer vector operand /// by the specified number of bits. Low-order bits are cleared. /// /// \headerfile /// /// This intrinsic corresponds to the VPSLLW / PSLLW instruction. /// /// \param __a /// A 128-bit integer vector containing the source operand. /// \param __count /// An integer value specifying the number of bits to left-shift each value /// in operand \a __a. /// \returns A 128-bit integer vector containing the left-shifted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi16(__m128i __a, int __count) { return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count); } /// Left-shifts each 16-bit value in the 128-bit integer vector operand /// by the specified number of bits. Low-order bits are cleared. /// /// \headerfile /// /// This intrinsic corresponds to the VPSLLW / PSLLW instruction. /// /// \param __a /// A 128-bit integer vector containing the source operand. /// \param __count /// A 128-bit integer vector in which bits [63:0] specify the number of bits /// to left-shift each value in operand \a __a. /// \returns A 128-bit integer vector containing the left-shifted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi16(__m128i __a, __m128i __count) { return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count); } /// Left-shifts each 32-bit value in the 128-bit integer vector operand /// by the specified number of bits. Low-order bits are cleared. /// /// \headerfile /// /// This intrinsic corresponds to the VPSLLD / PSLLD instruction. /// /// \param __a /// A 128-bit integer vector containing the source operand. /// \param __count /// An integer value specifying the number of bits to left-shift each value /// in operand \a __a. /// \returns A 128-bit integer vector containing the left-shifted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi32(__m128i __a, int __count) { return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count); } /// Left-shifts each 32-bit value in the 128-bit integer vector operand /// by the specified number of bits. Low-order bits are cleared. /// /// \headerfile /// /// This intrinsic corresponds to the VPSLLD / PSLLD instruction. /// /// \param __a /// A 128-bit integer vector containing the source operand. /// \param __count /// A 128-bit integer vector in which bits [63:0] specify the number of bits /// to left-shift each value in operand \a __a. /// \returns A 128-bit integer vector containing the left-shifted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi32(__m128i __a, __m128i __count) { return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count); } /// Left-shifts each 64-bit value in the 128-bit integer vector operand /// by the specified number of bits. Low-order bits are cleared. /// /// \headerfile /// /// This intrinsic corresponds to the VPSLLQ / PSLLQ instruction. /// /// \param __a /// A 128-bit integer vector containing the source operand. /// \param __count /// An integer value specifying the number of bits to left-shift each value /// in operand \a __a. /// \returns A 128-bit integer vector containing the left-shifted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi64(__m128i __a, int __count) { return __builtin_ia32_psllqi128((__v2di)__a, __count); } /// Left-shifts each 64-bit value in the 128-bit integer vector operand /// by the specified number of bits. Low-order bits are cleared. /// /// \headerfile /// /// This intrinsic corresponds to the VPSLLQ / PSLLQ instruction. /// /// \param __a /// A 128-bit integer vector containing the source operand. /// \param __count /// A 128-bit integer vector in which bits [63:0] specify the number of bits /// to left-shift each value in operand \a __a. /// \returns A 128-bit integer vector containing the left-shifted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi64(__m128i __a, __m128i __count) { return __builtin_ia32_psllq128((__v2di)__a, (__v2di)__count); } /// Right-shifts each 16-bit value in the 128-bit integer vector operand /// by the specified number of bits. High-order bits are filled with the sign /// bit of the initial value. /// /// \headerfile /// /// This intrinsic corresponds to the VPSRAW / PSRAW instruction. /// /// \param __a /// A 128-bit integer vector containing the source operand. /// \param __count /// An integer value specifying the number of bits to right-shift each value /// in operand \a __a. /// \returns A 128-bit integer vector containing the right-shifted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi16(__m128i __a, int __count) { return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count); } /// Right-shifts each 16-bit value in the 128-bit integer vector operand /// by the specified number of bits. High-order bits are filled with the sign /// bit of the initial value. /// /// \headerfile /// /// This intrinsic corresponds to the VPSRAW / PSRAW instruction. /// /// \param __a /// A 128-bit integer vector containing the source operand. /// \param __count /// A 128-bit integer vector in which bits [63:0] specify the number of bits /// to right-shift each value in operand \a __a. /// \returns A 128-bit integer vector containing the right-shifted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi16(__m128i __a, __m128i __count) { return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count); } /// Right-shifts each 32-bit value in the 128-bit integer vector operand /// by the specified number of bits. High-order bits are filled with the sign /// bit of the initial value. /// /// \headerfile /// /// This intrinsic corresponds to the VPSRAD / PSRAD instruction. /// /// \param __a /// A 128-bit integer vector containing the source operand. /// \param __count /// An integer value specifying the number of bits to right-shift each value /// in operand \a __a. /// \returns A 128-bit integer vector containing the right-shifted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi32(__m128i __a, int __count) { return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count); } /// Right-shifts each 32-bit value in the 128-bit integer vector operand /// by the specified number of bits. High-order bits are filled with the sign /// bit of the initial value. /// /// \headerfile /// /// This intrinsic corresponds to the VPSRAD / PSRAD instruction. /// /// \param __a /// A 128-bit integer vector containing the source operand. /// \param __count /// A 128-bit integer vector in which bits [63:0] specify the number of bits /// to right-shift each value in operand \a __a. /// \returns A 128-bit integer vector containing the right-shifted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi32(__m128i __a, __m128i __count) { return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count); } /// Right-shifts the 128-bit integer vector operand by the specified /// number of bytes. High-order bits are cleared. /// /// \headerfile /// /// \code /// __m128i _mm_srli_si128(__m128i a, const int imm); /// \endcode /// /// This intrinsic corresponds to the VPSRLDQ / PSRLDQ instruction. /// /// \param a /// A 128-bit integer vector containing the source operand. /// \param imm /// An immediate value specifying the number of bytes to right-shift operand /// \a a. /// \returns A 128-bit integer vector containing the right-shifted value. #define _mm_srli_si128(a, imm) \ ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), \ (int)(imm))) #define _mm_bsrli_si128(a, imm) \ ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), \ (int)(imm))) /// Right-shifts each of 16-bit values in the 128-bit integer vector /// operand by the specified number of bits. High-order bits are cleared. /// /// \headerfile /// /// This intrinsic corresponds to the VPSRLW / PSRLW instruction. /// /// \param __a /// A 128-bit integer vector containing the source operand. /// \param __count /// An integer value specifying the number of bits to right-shift each value /// in operand \a __a. /// \returns A 128-bit integer vector containing the right-shifted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi16(__m128i __a, int __count) { return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count); } /// Right-shifts each of 16-bit values in the 128-bit integer vector /// operand by the specified number of bits. High-order bits are cleared. /// /// \headerfile /// /// This intrinsic corresponds to the VPSRLW / PSRLW instruction. /// /// \param __a /// A 128-bit integer vector containing the source operand. /// \param __count /// A 128-bit integer vector in which bits [63:0] specify the number of bits /// to right-shift each value in operand \a __a. /// \returns A 128-bit integer vector containing the right-shifted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi16(__m128i __a, __m128i __count) { return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count); } /// Right-shifts each of 32-bit values in the 128-bit integer vector /// operand by the specified number of bits. High-order bits are cleared. /// /// \headerfile /// /// This intrinsic corresponds to the VPSRLD / PSRLD instruction. /// /// \param __a /// A 128-bit integer vector containing the source operand. /// \param __count /// An integer value specifying the number of bits to right-shift each value /// in operand \a __a. /// \returns A 128-bit integer vector containing the right-shifted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi32(__m128i __a, int __count) { return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count); } /// Right-shifts each of 32-bit values in the 128-bit integer vector /// operand by the specified number of bits. High-order bits are cleared. /// /// \headerfile /// /// This intrinsic corresponds to the VPSRLD / PSRLD instruction. /// /// \param __a /// A 128-bit integer vector containing the source operand. /// \param __count /// A 128-bit integer vector in which bits [63:0] specify the number of bits /// to right-shift each value in operand \a __a. /// \returns A 128-bit integer vector containing the right-shifted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi32(__m128i __a, __m128i __count) { return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count); } /// Right-shifts each of 64-bit values in the 128-bit integer vector /// operand by the specified number of bits. High-order bits are cleared. /// /// \headerfile /// /// This intrinsic corresponds to the VPSRLQ / PSRLQ instruction. /// /// \param __a /// A 128-bit integer vector containing the source operand. /// \param __count /// An integer value specifying the number of bits to right-shift each value /// in operand \a __a. /// \returns A 128-bit integer vector containing the right-shifted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi64(__m128i __a, int __count) { return __builtin_ia32_psrlqi128((__v2di)__a, __count); } /// Right-shifts each of 64-bit values in the 128-bit integer vector /// operand by the specified number of bits. High-order bits are cleared. /// /// \headerfile /// /// This intrinsic corresponds to the VPSRLQ / PSRLQ instruction. /// /// \param __a /// A 128-bit integer vector containing the source operand. /// \param __count /// A 128-bit integer vector in which bits [63:0] specify the number of bits /// to right-shift each value in operand \a __a. /// \returns A 128-bit integer vector containing the right-shifted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi64(__m128i __a, __m128i __count) { return __builtin_ia32_psrlq128((__v2di)__a, (__v2di)__count); } /// Compares each of the corresponding 8-bit values of the 128-bit /// integer vectors for equality. Each comparison yields 0x0 for false, 0xFF /// for true. /// /// \headerfile /// /// This intrinsic corresponds to the VPCMPEQB / PCMPEQB instruction. /// /// \param __a /// A 128-bit integer vector. /// \param __b /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the comparison results. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a, __m128i __b) { return (__m128i)((__v16qi)__a == (__v16qi)__b); } /// Compares each of the corresponding 16-bit values of the 128-bit /// integer vectors for equality. Each comparison yields 0x0 for false, /// 0xFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VPCMPEQW / PCMPEQW instruction. /// /// \param __a /// A 128-bit integer vector. /// \param __b /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the comparison results. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi16(__m128i __a, __m128i __b) { return (__m128i)((__v8hi)__a == (__v8hi)__b); } /// Compares each of the corresponding 32-bit values of the 128-bit /// integer vectors for equality. Each comparison yields 0x0 for false, /// 0xFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VPCMPEQD / PCMPEQD instruction. /// /// \param __a /// A 128-bit integer vector. /// \param __b /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the comparison results. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi32(__m128i __a, __m128i __b) { return (__m128i)((__v4si)__a == (__v4si)__b); } /// Compares each of the corresponding signed 8-bit values of the 128-bit /// integer vectors to determine if the values in the first operand are /// greater than those in the second operand. Each comparison yields 0x0 for /// false, 0xFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VPCMPGTB / PCMPGTB instruction. /// /// \param __a /// A 128-bit integer vector. /// \param __b /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the comparison results. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi8(__m128i __a, __m128i __b) { /* This function always performs a signed comparison, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m128i)((__v16qs)__a > (__v16qs)__b); } /// Compares each of the corresponding signed 16-bit values of the /// 128-bit integer vectors to determine if the values in the first operand /// are greater than those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VPCMPGTW / PCMPGTW instruction. /// /// \param __a /// A 128-bit integer vector. /// \param __b /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the comparison results. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi16(__m128i __a, __m128i __b) { return (__m128i)((__v8hi)__a > (__v8hi)__b); } /// Compares each of the corresponding signed 32-bit values of the /// 128-bit integer vectors to determine if the values in the first operand /// are greater than those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VPCMPGTD / PCMPGTD instruction. /// /// \param __a /// A 128-bit integer vector. /// \param __b /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the comparison results. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi32(__m128i __a, __m128i __b) { return (__m128i)((__v4si)__a > (__v4si)__b); } /// Compares each of the corresponding signed 8-bit values of the 128-bit /// integer vectors to determine if the values in the first operand are less /// than those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VPCMPGTB / PCMPGTB instruction. /// /// \param __a /// A 128-bit integer vector. /// \param __b /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the comparison results. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi8(__m128i __a, __m128i __b) { return _mm_cmpgt_epi8(__b, __a); } /// Compares each of the corresponding signed 16-bit values of the /// 128-bit integer vectors to determine if the values in the first operand /// are less than those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VPCMPGTW / PCMPGTW instruction. /// /// \param __a /// A 128-bit integer vector. /// \param __b /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the comparison results. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi16(__m128i __a, __m128i __b) { return _mm_cmpgt_epi16(__b, __a); } /// Compares each of the corresponding signed 32-bit values of the /// 128-bit integer vectors to determine if the values in the first operand /// are less than those in the second operand. /// /// Each comparison yields 0x0 for false, 0xFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the VPCMPGTD / PCMPGTD instruction. /// /// \param __a /// A 128-bit integer vector. /// \param __b /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the comparison results. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi32(__m128i __a, __m128i __b) { return _mm_cmpgt_epi32(__b, __a); } #ifdef __x86_64__ /// Converts a 64-bit signed integer value from the second operand into a /// double-precision value and returns it in the lower element of a [2 x /// double] vector; the upper element of the returned vector is copied from /// the upper element of the first operand. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTSI2SD / CVTSI2SD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The upper 64 bits of this operand are /// copied to the upper 64 bits of the destination. /// \param __b /// A 64-bit signed integer operand containing the value to be converted. /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the /// converted value of the second operand. The upper 64 bits are copied from /// the upper 64 bits of the first operand. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtsi64_sd(__m128d __a, long long __b) { __a[0] = __b; return __a; } /// Converts the first (lower) element of a vector of [2 x double] into a /// 64-bit signed integer value, according to the current rounding mode. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTSD2SI / CVTSD2SI instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the /// conversion. /// \returns A 64-bit signed integer containing the converted value. static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvtsd_si64(__m128d __a) { return __builtin_ia32_cvtsd2si64((__v2df)__a); } /// Converts the first (lower) element of a vector of [2 x double] into a /// 64-bit signed integer value, truncating the result when it is inexact. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTTSD2SI / CVTTSD2SI /// instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the /// conversion. /// \returns A 64-bit signed integer containing the converted value. static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvttsd_si64(__m128d __a) { return __builtin_ia32_cvttsd2si64((__v2df)__a); } #endif /// Converts a vector of [4 x i32] into a vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTDQ2PS / CVTDQ2PS instruction. /// /// \param __a /// A 128-bit integer vector. /// \returns A 128-bit vector of [4 x float] containing the converted values. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtepi32_ps(__m128i __a) { return (__m128) __builtin_convertvector((__v4si)__a, __v4sf); } /// Converts a vector of [4 x float] into a vector of [4 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTPS2DQ / CVTPS2DQ instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \returns A 128-bit integer vector of [4 x i32] containing the converted /// values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtps_epi32(__m128 __a) { return (__m128i)__builtin_ia32_cvtps2dq((__v4sf)__a); } /// Converts a vector of [4 x float] into a vector of [4 x i32], /// truncating the result when it is inexact. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTTPS2DQ / CVTTPS2DQ /// instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x i32] containing the converted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epi32(__m128 __a) { return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a); } /// Returns a vector of [4 x i32] where the lowest element is the input /// operand and the remaining elements are zero. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVD / MOVD instruction. /// /// \param __a /// A 32-bit signed integer operand. /// \returns A 128-bit vector of [4 x i32]. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi32_si128(int __a) { return __extension__(__m128i)(__v4si){__a, 0, 0, 0}; } /// Returns a vector of [2 x i64] where the lower element is the input /// operand and the upper element is zero. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVQ / MOVQ instruction /// in 64-bit mode. /// /// \param __a /// A 64-bit signed integer operand containing the value to be converted. /// \returns A 128-bit vector of [2 x i64] containing the converted value. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi64_si128(long long __a) { return __extension__(__m128i)(__v2di){__a, 0}; } /// Moves the least significant 32 bits of a vector of [4 x i32] to a /// 32-bit signed integer value. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVD / MOVD instruction. /// /// \param __a /// A vector of [4 x i32]. The least significant 32 bits are moved to the /// destination. /// \returns A 32-bit signed integer containing the moved value. static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsi128_si32(__m128i __a) { __v4si __b = (__v4si)__a; return __b[0]; } /// Moves the least significant 64 bits of a vector of [2 x i64] to a /// 64-bit signed integer value. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVQ / MOVQ instruction. /// /// \param __a /// A vector of [2 x i64]. The least significant 64 bits are moved to the /// destination. /// \returns A 64-bit signed integer containing the moved value. static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvtsi128_si64(__m128i __a) { return __a[0]; } /// Moves packed integer values from an aligned 128-bit memory location /// to elements in a 128-bit integer vector. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVDQA / MOVDQA instruction. /// /// \param __p /// An aligned pointer to a memory location containing integer values. /// \returns A 128-bit integer vector containing the moved values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_load_si128(__m128i const *__p) { return *__p; } /// Moves packed integer values from an unaligned 128-bit memory location /// to elements in a 128-bit integer vector. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVDQU / MOVDQU instruction. /// /// \param __p /// A pointer to a memory location containing integer values. /// \returns A 128-bit integer vector containing the moved values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si128(__m128i_u const *__p) { struct __loadu_si128 { __m128i_u __v; } __attribute__((__packed__, __may_alias__)); return ((const struct __loadu_si128 *)__p)->__v; } /// Returns a vector of [2 x i64] where the lower element is taken from /// the lower element of the operand, and the upper element is zero. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVQ / MOVQ instruction. /// /// \param __p /// A 128-bit vector of [2 x i64]. Bits [63:0] are written to bits [63:0] of /// the destination. /// \returns A 128-bit vector of [2 x i64]. The lower order bits contain the /// moved value. The higher order bits are cleared. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadl_epi64(__m128i_u const *__p) { struct __mm_loadl_epi64_struct { long long __u; } __attribute__((__packed__, __may_alias__)); return __extension__(__m128i){ ((const struct __mm_loadl_epi64_struct *)__p)->__u, 0}; } /// Generates a 128-bit vector of [4 x i32] with unspecified content. /// This could be used as an argument to another intrinsic function where the /// argument is required but the value is not actually used. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \returns A 128-bit vector of [4 x i32] with unspecified content. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void) { return (__m128i)__builtin_ia32_undef128(); } /// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with /// the specified 64-bit integer values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __q1 /// A 64-bit integer value used to initialize the upper 64 bits of the /// destination vector of [2 x i64]. /// \param __q0 /// A 64-bit integer value used to initialize the lower 64 bits of the /// destination vector of [2 x i64]. /// \returns An initialized 128-bit vector of [2 x i64] containing the values /// provided in the operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64x(long long __q1, long long __q0) { return __extension__(__m128i)(__v2di){__q0, __q1}; } /// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with /// the specified 64-bit integer values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __q1 /// A 64-bit integer value used to initialize the upper 64 bits of the /// destination vector of [2 x i64]. /// \param __q0 /// A 64-bit integer value used to initialize the lower 64 bits of the /// destination vector of [2 x i64]. /// \returns An initialized 128-bit vector of [2 x i64] containing the values /// provided in the operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64(__m64 __q1, __m64 __q0) { return _mm_set_epi64x((long long)__q1, (long long)__q0); } /// Initializes the 32-bit values in a 128-bit vector of [4 x i32] with /// the specified 32-bit integer values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __i3 /// A 32-bit integer value used to initialize bits [127:96] of the /// destination vector. /// \param __i2 /// A 32-bit integer value used to initialize bits [95:64] of the destination /// vector. /// \param __i1 /// A 32-bit integer value used to initialize bits [63:32] of the destination /// vector. /// \param __i0 /// A 32-bit integer value used to initialize bits [31:0] of the destination /// vector. /// \returns An initialized 128-bit vector of [4 x i32] containing the values /// provided in the operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi32(int __i3, int __i2, int __i1, int __i0) { return __extension__(__m128i)(__v4si){__i0, __i1, __i2, __i3}; } /// Initializes the 16-bit values in a 128-bit vector of [8 x i16] with /// the specified 16-bit integer values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __w7 /// A 16-bit integer value used to initialize bits [127:112] of the /// destination vector. /// \param __w6 /// A 16-bit integer value used to initialize bits [111:96] of the /// destination vector. /// \param __w5 /// A 16-bit integer value used to initialize bits [95:80] of the destination /// vector. /// \param __w4 /// A 16-bit integer value used to initialize bits [79:64] of the destination /// vector. /// \param __w3 /// A 16-bit integer value used to initialize bits [63:48] of the destination /// vector. /// \param __w2 /// A 16-bit integer value used to initialize bits [47:32] of the destination /// vector. /// \param __w1 /// A 16-bit integer value used to initialize bits [31:16] of the destination /// vector. /// \param __w0 /// A 16-bit integer value used to initialize bits [15:0] of the destination /// vector. /// \returns An initialized 128-bit vector of [8 x i16] containing the values /// provided in the operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, short __w2, short __w1, short __w0) { return __extension__(__m128i)(__v8hi){__w0, __w1, __w2, __w3, __w4, __w5, __w6, __w7}; } /// Initializes the 8-bit values in a 128-bit vector of [16 x i8] with /// the specified 8-bit integer values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __b15 /// Initializes bits [127:120] of the destination vector. /// \param __b14 /// Initializes bits [119:112] of the destination vector. /// \param __b13 /// Initializes bits [111:104] of the destination vector. /// \param __b12 /// Initializes bits [103:96] of the destination vector. /// \param __b11 /// Initializes bits [95:88] of the destination vector. /// \param __b10 /// Initializes bits [87:80] of the destination vector. /// \param __b9 /// Initializes bits [79:72] of the destination vector. /// \param __b8 /// Initializes bits [71:64] of the destination vector. /// \param __b7 /// Initializes bits [63:56] of the destination vector. /// \param __b6 /// Initializes bits [55:48] of the destination vector. /// \param __b5 /// Initializes bits [47:40] of the destination vector. /// \param __b4 /// Initializes bits [39:32] of the destination vector. /// \param __b3 /// Initializes bits [31:24] of the destination vector. /// \param __b2 /// Initializes bits [23:16] of the destination vector. /// \param __b1 /// Initializes bits [15:8] of the destination vector. /// \param __b0 /// Initializes bits [7:0] of the destination vector. /// \returns An initialized 128-bit vector of [16 x i8] containing the values /// provided in the operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b9, char __b8, char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0) { return __extension__(__m128i)(__v16qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15}; } /// Initializes both values in a 128-bit integer vector with the /// specified 64-bit integer value. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __q /// Integer value used to initialize the elements of the destination integer /// vector. /// \returns An initialized 128-bit integer vector of [2 x i64] with both /// elements containing the value provided in the operand. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64x(long long __q) { return _mm_set_epi64x(__q, __q); } /// Initializes both values in a 128-bit vector of [2 x i64] with the /// specified 64-bit value. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __q /// A 64-bit value used to initialize the elements of the destination integer /// vector. /// \returns An initialized 128-bit vector of [2 x i64] with all elements /// containing the value provided in the operand. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64(__m64 __q) { return _mm_set_epi64(__q, __q); } /// Initializes all values in a 128-bit vector of [4 x i32] with the /// specified 32-bit value. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __i /// A 32-bit value used to initialize the elements of the destination integer /// vector. /// \returns An initialized 128-bit vector of [4 x i32] with all elements /// containing the value provided in the operand. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi32(int __i) { return _mm_set_epi32(__i, __i, __i, __i); } /// Initializes all values in a 128-bit vector of [8 x i16] with the /// specified 16-bit value. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __w /// A 16-bit value used to initialize the elements of the destination integer /// vector. /// \returns An initialized 128-bit vector of [8 x i16] with all elements /// containing the value provided in the operand. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi16(short __w) { return _mm_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w); } /// Initializes all values in a 128-bit vector of [16 x i8] with the /// specified 8-bit value. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __b /// An 8-bit value used to initialize the elements of the destination integer /// vector. /// \returns An initialized 128-bit vector of [16 x i8] with all elements /// containing the value provided in the operand. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b) { return _mm_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b); } /// Constructs a 128-bit integer vector, initialized in reverse order /// with the specified 64-bit integral values. /// /// \headerfile /// /// This intrinsic does not correspond to a specific instruction. /// /// \param __q0 /// A 64-bit integral value used to initialize the lower 64 bits of the /// result. /// \param __q1 /// A 64-bit integral value used to initialize the upper 64 bits of the /// result. /// \returns An initialized 128-bit integer vector. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi64(__m64 __q0, __m64 __q1) { return _mm_set_epi64(__q1, __q0); } /// Constructs a 128-bit integer vector, initialized in reverse order /// with the specified 32-bit integral values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __i0 /// A 32-bit integral value used to initialize bits [31:0] of the result. /// \param __i1 /// A 32-bit integral value used to initialize bits [63:32] of the result. /// \param __i2 /// A 32-bit integral value used to initialize bits [95:64] of the result. /// \param __i3 /// A 32-bit integral value used to initialize bits [127:96] of the result. /// \returns An initialized 128-bit integer vector. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi32(int __i0, int __i1, int __i2, int __i3) { return _mm_set_epi32(__i3, __i2, __i1, __i0); } /// Constructs a 128-bit integer vector, initialized in reverse order /// with the specified 16-bit integral values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __w0 /// A 16-bit integral value used to initialize bits [15:0] of the result. /// \param __w1 /// A 16-bit integral value used to initialize bits [31:16] of the result. /// \param __w2 /// A 16-bit integral value used to initialize bits [47:32] of the result. /// \param __w3 /// A 16-bit integral value used to initialize bits [63:48] of the result. /// \param __w4 /// A 16-bit integral value used to initialize bits [79:64] of the result. /// \param __w5 /// A 16-bit integral value used to initialize bits [95:80] of the result. /// \param __w6 /// A 16-bit integral value used to initialize bits [111:96] of the result. /// \param __w7 /// A 16-bit integral value used to initialize bits [127:112] of the result. /// \returns An initialized 128-bit integer vector. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, short __w5, short __w6, short __w7) { return _mm_set_epi16(__w7, __w6, __w5, __w4, __w3, __w2, __w1, __w0); } /// Constructs a 128-bit integer vector, initialized in reverse order /// with the specified 8-bit integral values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __b0 /// An 8-bit integral value used to initialize bits [7:0] of the result. /// \param __b1 /// An 8-bit integral value used to initialize bits [15:8] of the result. /// \param __b2 /// An 8-bit integral value used to initialize bits [23:16] of the result. /// \param __b3 /// An 8-bit integral value used to initialize bits [31:24] of the result. /// \param __b4 /// An 8-bit integral value used to initialize bits [39:32] of the result. /// \param __b5 /// An 8-bit integral value used to initialize bits [47:40] of the result. /// \param __b6 /// An 8-bit integral value used to initialize bits [55:48] of the result. /// \param __b7 /// An 8-bit integral value used to initialize bits [63:56] of the result. /// \param __b8 /// An 8-bit integral value used to initialize bits [71:64] of the result. /// \param __b9 /// An 8-bit integral value used to initialize bits [79:72] of the result. /// \param __b10 /// An 8-bit integral value used to initialize bits [87:80] of the result. /// \param __b11 /// An 8-bit integral value used to initialize bits [95:88] of the result. /// \param __b12 /// An 8-bit integral value used to initialize bits [103:96] of the result. /// \param __b13 /// An 8-bit integral value used to initialize bits [111:104] of the result. /// \param __b14 /// An 8-bit integral value used to initialize bits [119:112] of the result. /// \param __b15 /// An 8-bit integral value used to initialize bits [127:120] of the result. /// \returns An initialized 128-bit integer vector. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7, char __b8, char __b9, char __b10, char __b11, char __b12, char __b13, char __b14, char __b15) { return _mm_set_epi8(__b15, __b14, __b13, __b12, __b11, __b10, __b9, __b8, __b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); } /// Creates a 128-bit integer vector initialized to zero. /// /// \headerfile /// /// This intrinsic corresponds to the VXORPS / XORPS instruction. /// /// \returns An initialized 128-bit integer vector with all elements set to /// zero. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void) { return __extension__(__m128i)(__v2di){0LL, 0LL}; } /// Stores a 128-bit integer vector to a memory location aligned on a /// 128-bit boundary. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVAPS / MOVAPS instruction. /// /// \param __p /// A pointer to an aligned memory location that will receive the integer /// values. /// \param __b /// A 128-bit integer vector containing the values to be moved. static __inline__ void __DEFAULT_FN_ATTRS _mm_store_si128(__m128i *__p, __m128i __b) { *__p = __b; } /// Stores a 128-bit integer vector to an unaligned memory location. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVUPS / MOVUPS instruction. /// /// \param __p /// A pointer to a memory location that will receive the integer values. /// \param __b /// A 128-bit integer vector containing the values to be moved. static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si128(__m128i_u *__p, __m128i __b) { struct __storeu_si128 { __m128i_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_si128 *)__p)->__v = __b; } /// Stores a 64-bit integer value from the low element of a 128-bit integer /// vector. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVQ / MOVQ instruction. /// /// \param __p /// A pointer to a 64-bit memory location. The address of the memory /// location does not have to be aligned. /// \param __b /// A 128-bit integer vector containing the value to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si64(void *__p, __m128i __b) { struct __storeu_si64 { long long __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_si64 *)__p)->__v = ((__v2di)__b)[0]; } /// Stores a 32-bit integer value from the low element of a 128-bit integer /// vector. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVD / MOVD instruction. /// /// \param __p /// A pointer to a 32-bit memory location. The address of the memory /// location does not have to be aligned. /// \param __b /// A 128-bit integer vector containing the value to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si32(void *__p, __m128i __b) { struct __storeu_si32 { int __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_si32 *)__p)->__v = ((__v4si)__b)[0]; } /// Stores a 16-bit integer value from the low element of a 128-bit integer /// vector. /// /// \headerfile /// /// This intrinsic does not correspond to a specific instruction. /// /// \param __p /// A pointer to a 16-bit memory location. The address of the memory /// location does not have to be aligned. /// \param __b /// A 128-bit integer vector containing the value to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si16(void *__p, __m128i __b) { struct __storeu_si16 { short __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_si16 *)__p)->__v = ((__v8hi)__b)[0]; } /// Moves bytes selected by the mask from the first operand to the /// specified unaligned memory location. When a mask bit is 1, the /// corresponding byte is written, otherwise it is not written. /// /// To minimize caching, the data is flagged as non-temporal (unlikely to be /// used again soon). Exception and trap behavior for elements not selected /// for storage to memory are implementation dependent. /// /// \headerfile /// /// This intrinsic corresponds to the VMASKMOVDQU / MASKMOVDQU /// instruction. /// /// \param __d /// A 128-bit integer vector containing the values to be moved. /// \param __n /// A 128-bit integer vector containing the mask. The most significant bit of /// each byte represents the mask bits. /// \param __p /// A pointer to an unaligned 128-bit memory location where the specified /// values are moved. static __inline__ void __DEFAULT_FN_ATTRS _mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p) { __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p); } /// Stores the lower 64 bits of a 128-bit integer vector of [2 x i64] to /// a memory location. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVLPS / MOVLPS instruction. /// /// \param __p /// A pointer to a 64-bit memory location that will receive the lower 64 bits /// of the integer vector parameter. /// \param __a /// A 128-bit integer vector of [2 x i64]. The lower 64 bits contain the /// value to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_epi64(__m128i_u *__p, __m128i __a) { struct __mm_storel_epi64_struct { long long __u; } __attribute__((__packed__, __may_alias__)); ((struct __mm_storel_epi64_struct *)__p)->__u = __a[0]; } /// Stores a 128-bit floating point vector of [2 x double] to a 128-bit /// aligned memory location. /// /// To minimize caching, the data is flagged as non-temporal (unlikely to be /// used again soon). /// /// \headerfile /// /// This intrinsic corresponds to the VMOVNTPS / MOVNTPS instruction. /// /// \param __p /// A pointer to the 128-bit aligned memory location used to store the value. /// \param __a /// A vector of [2 x double] containing the 64-bit values to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p, __m128d __a) { __builtin_nontemporal_store((__v2df)__a, (__v2df *)__p); } /// Stores a 128-bit integer vector to a 128-bit aligned memory location. /// /// To minimize caching, the data is flagged as non-temporal (unlikely to be /// used again soon). /// /// \headerfile /// /// This intrinsic corresponds to the VMOVNTPS / MOVNTPS instruction. /// /// \param __p /// A pointer to the 128-bit aligned memory location used to store the value. /// \param __a /// A 128-bit integer vector containing the values to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p, __m128i __a) { __builtin_nontemporal_store((__v2di)__a, (__v2di *)__p); } /// Stores a 32-bit integer value in the specified memory location. /// /// To minimize caching, the data is flagged as non-temporal (unlikely to be /// used again soon). /// /// \headerfile /// /// This intrinsic corresponds to the MOVNTI instruction. /// /// \param __p /// A pointer to the 32-bit memory location used to store the value. /// \param __a /// A 32-bit integer containing the value to be stored. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) _mm_stream_si32(int *__p, int __a) { __builtin_ia32_movnti(__p, __a); } #ifdef __x86_64__ /// Stores a 64-bit integer value in the specified memory location. /// /// To minimize caching, the data is flagged as non-temporal (unlikely to be /// used again soon). /// /// \headerfile /// /// This intrinsic corresponds to the MOVNTIQ instruction. /// /// \param __p /// A pointer to the 64-bit memory location used to store the value. /// \param __a /// A 64-bit integer containing the value to be stored. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) _mm_stream_si64(long long *__p, long long __a) { __builtin_ia32_movnti64(__p, __a); } #endif #if defined(__cplusplus) extern "C" { #endif /// The cache line containing \a __p is flushed and invalidated from all /// caches in the coherency domain. /// /// \headerfile /// /// This intrinsic corresponds to the CLFLUSH instruction. /// /// \param __p /// A pointer to the memory location used to identify the cache line to be /// flushed. void _mm_clflush(void const *__p); /// Forces strong memory ordering (serialization) between load /// instructions preceding this instruction and load instructions following /// this instruction, ensuring the system completes all previous loads before /// executing subsequent loads. /// /// \headerfile /// /// This intrinsic corresponds to the LFENCE instruction. /// void _mm_lfence(void); /// Forces strong memory ordering (serialization) between load and store /// instructions preceding this instruction and load and store instructions /// following this instruction, ensuring that the system completes all /// previous memory accesses before executing subsequent memory accesses. /// /// \headerfile /// /// This intrinsic corresponds to the MFENCE instruction. /// void _mm_mfence(void); #if defined(__cplusplus) } // extern "C" #endif /// Converts 16-bit signed integers from both 128-bit integer vector /// operands into 8-bit signed integers, and packs the results into the /// destination. Positive values greater than 0x7F are saturated to 0x7F. /// Negative values less than 0x80 are saturated to 0x80. /// /// \headerfile /// /// This intrinsic corresponds to the VPACKSSWB / PACKSSWB instruction. /// /// \param __a /// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as /// a signed integer and is converted to a 8-bit signed integer with /// saturation. Values greater than 0x7F are saturated to 0x7F. Values less /// than 0x80 are saturated to 0x80. The converted [8 x i8] values are /// written to the lower 64 bits of the result. /// \param __b /// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as /// a signed integer and is converted to a 8-bit signed integer with /// saturation. Values greater than 0x7F are saturated to 0x7F. Values less /// than 0x80 are saturated to 0x80. The converted [8 x i8] values are /// written to the higher 64 bits of the result. /// \returns A 128-bit vector of [16 x i8] containing the converted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b); } /// Converts 32-bit signed integers from both 128-bit integer vector /// operands into 16-bit signed integers, and packs the results into the /// destination. Positive values greater than 0x7FFF are saturated to 0x7FFF. /// Negative values less than 0x8000 are saturated to 0x8000. /// /// \headerfile /// /// This intrinsic corresponds to the VPACKSSDW / PACKSSDW instruction. /// /// \param __a /// A 128-bit integer vector of [4 x i32]. Each 32-bit element is treated as /// a signed integer and is converted to a 16-bit signed integer with /// saturation. Values greater than 0x7FFF are saturated to 0x7FFF. Values /// less than 0x8000 are saturated to 0x8000. The converted [4 x i16] values /// are written to the lower 64 bits of the result. /// \param __b /// A 128-bit integer vector of [4 x i32]. Each 32-bit element is treated as /// a signed integer and is converted to a 16-bit signed integer with /// saturation. Values greater than 0x7FFF are saturated to 0x7FFF. Values /// less than 0x8000 are saturated to 0x8000. The converted [4 x i16] values /// are written to the higher 64 bits of the result. /// \returns A 128-bit vector of [8 x i16] containing the converted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi32(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b); } /// Converts 16-bit signed integers from both 128-bit integer vector /// operands into 8-bit unsigned integers, and packs the results into the /// destination. Values greater than 0xFF are saturated to 0xFF. Values less /// than 0x00 are saturated to 0x00. /// /// \headerfile /// /// This intrinsic corresponds to the VPACKUSWB / PACKUSWB instruction. /// /// \param __a /// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as /// a signed integer and is converted to an 8-bit unsigned integer with /// saturation. Values greater than 0xFF are saturated to 0xFF. Values less /// than 0x00 are saturated to 0x00. The converted [8 x i8] values are /// written to the lower 64 bits of the result. /// \param __b /// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as /// a signed integer and is converted to an 8-bit unsigned integer with /// saturation. Values greater than 0xFF are saturated to 0xFF. Values less /// than 0x00 are saturated to 0x00. The converted [8 x i8] values are /// written to the higher 64 bits of the result. /// \returns A 128-bit vector of [16 x i8] containing the converted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b); } /// Extracts 16 bits from a 128-bit integer vector of [8 x i16], using /// the immediate-value parameter as a selector. /// /// \headerfile /// /// \code /// __m128i _mm_extract_epi16(__m128i a, const int imm); /// \endcode /// /// This intrinsic corresponds to the VPEXTRW / PEXTRW instruction. /// /// \param a /// A 128-bit integer vector. /// \param imm /// An immediate value. Bits [2:0] selects values from \a a to be assigned /// to bits[15:0] of the result. \n /// 000: assign values from bits [15:0] of \a a. \n /// 001: assign values from bits [31:16] of \a a. \n /// 010: assign values from bits [47:32] of \a a. \n /// 011: assign values from bits [63:48] of \a a. \n /// 100: assign values from bits [79:64] of \a a. \n /// 101: assign values from bits [95:80] of \a a. \n /// 110: assign values from bits [111:96] of \a a. \n /// 111: assign values from bits [127:112] of \a a. /// \returns An integer, whose lower 16 bits are selected from the 128-bit /// integer vector parameter and the remaining bits are assigned zeros. #define _mm_extract_epi16(a, imm) \ ((int)(unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(a), \ (int)(imm))) /// Constructs a 128-bit integer vector by first making a copy of the /// 128-bit integer vector parameter, and then inserting the lower 16 bits /// of an integer parameter into an offset specified by the immediate-value /// parameter. /// /// \headerfile /// /// \code /// __m128i _mm_insert_epi16(__m128i a, int b, const int imm); /// \endcode /// /// This intrinsic corresponds to the VPINSRW / PINSRW instruction. /// /// \param a /// A 128-bit integer vector of [8 x i16]. This vector is copied to the /// result and then one of the eight elements in the result is replaced by /// the lower 16 bits of \a b. /// \param b /// An integer. The lower 16 bits of this parameter are written to the /// result beginning at an offset specified by \a imm. /// \param imm /// An immediate value specifying the bit offset in the result at which the /// lower 16 bits of \a b are written. /// \returns A 128-bit integer vector containing the constructed values. #define _mm_insert_epi16(a, b, imm) \ ((__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b), \ (int)(imm))) /// Copies the values of the most significant bits from each 8-bit /// element in a 128-bit integer vector of [16 x i8] to create a 16-bit mask /// value, zero-extends the value, and writes it to the destination. /// /// \headerfile /// /// This intrinsic corresponds to the VPMOVMSKB / PMOVMSKB instruction. /// /// \param __a /// A 128-bit integer vector containing the values with bits to be extracted. /// \returns The most significant bits from each 8-bit element in \a __a, /// written to bits [15:0]. The other bits are assigned zeros. static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a) { return __builtin_ia32_pmovmskb128((__v16qi)__a); } /// Constructs a 128-bit integer vector by shuffling four 32-bit /// elements of a 128-bit integer vector parameter, using the immediate-value /// parameter as a specifier. /// /// \headerfile /// /// \code /// __m128i _mm_shuffle_epi32(__m128i a, const int imm); /// \endcode /// /// This intrinsic corresponds to the VPSHUFD / PSHUFD instruction. /// /// \param a /// A 128-bit integer vector containing the values to be copied. /// \param imm /// An immediate value containing an 8-bit value specifying which elements to /// copy from a. The destinations within the 128-bit destination are assigned /// values as follows: \n /// Bits [1:0] are used to assign values to bits [31:0] of the result. \n /// Bits [3:2] are used to assign values to bits [63:32] of the result. \n /// Bits [5:4] are used to assign values to bits [95:64] of the result. \n /// Bits [7:6] are used to assign values to bits [127:96] of the result. \n /// Bit value assignments: \n /// 00: assign values from bits [31:0] of \a a. \n /// 01: assign values from bits [63:32] of \a a. \n /// 10: assign values from bits [95:64] of \a a. \n /// 11: assign values from bits [127:96] of \a a. \n /// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. /// _MM_SHUFFLE(b6, b4, b2, b0) can create an 8-bit mask of the form /// [b6, b4, b2, b0]. /// \returns A 128-bit integer vector containing the shuffled values. #define _mm_shuffle_epi32(a, imm) \ ((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm))) /// Constructs a 128-bit integer vector by shuffling four lower 16-bit /// elements of a 128-bit integer vector of [8 x i16], using the immediate /// value parameter as a specifier. /// /// \headerfile /// /// \code /// __m128i _mm_shufflelo_epi16(__m128i a, const int imm); /// \endcode /// /// This intrinsic corresponds to the VPSHUFLW / PSHUFLW instruction. /// /// \param a /// A 128-bit integer vector of [8 x i16]. Bits [127:64] are copied to bits /// [127:64] of the result. /// \param imm /// An 8-bit immediate value specifying which elements to copy from \a a. \n /// Bits[1:0] are used to assign values to bits [15:0] of the result. \n /// Bits[3:2] are used to assign values to bits [31:16] of the result. \n /// Bits[5:4] are used to assign values to bits [47:32] of the result. \n /// Bits[7:6] are used to assign values to bits [63:48] of the result. \n /// Bit value assignments: \n /// 00: assign values from bits [15:0] of \a a. \n /// 01: assign values from bits [31:16] of \a a. \n /// 10: assign values from bits [47:32] of \a a. \n /// 11: assign values from bits [63:48] of \a a. \n /// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. /// _MM_SHUFFLE(b6, b4, b2, b0) can create an 8-bit mask of the form /// [b6, b4, b2, b0]. /// \returns A 128-bit integer vector containing the shuffled values. #define _mm_shufflelo_epi16(a, imm) \ ((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm))) /// Constructs a 128-bit integer vector by shuffling four upper 16-bit /// elements of a 128-bit integer vector of [8 x i16], using the immediate /// value parameter as a specifier. /// /// \headerfile /// /// \code /// __m128i _mm_shufflehi_epi16(__m128i a, const int imm); /// \endcode /// /// This intrinsic corresponds to the VPSHUFHW / PSHUFHW instruction. /// /// \param a /// A 128-bit integer vector of [8 x i16]. Bits [63:0] are copied to bits /// [63:0] of the result. /// \param imm /// An 8-bit immediate value specifying which elements to copy from \a a. \n /// Bits[1:0] are used to assign values to bits [79:64] of the result. \n /// Bits[3:2] are used to assign values to bits [95:80] of the result. \n /// Bits[5:4] are used to assign values to bits [111:96] of the result. \n /// Bits[7:6] are used to assign values to bits [127:112] of the result. \n /// Bit value assignments: \n /// 00: assign values from bits [79:64] of \a a. \n /// 01: assign values from bits [95:80] of \a a. \n /// 10: assign values from bits [111:96] of \a a. \n /// 11: assign values from bits [127:112] of \a a. \n /// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. /// _MM_SHUFFLE(b6, b4, b2, b0) can create an 8-bit mask of the form /// [b6, b4, b2, b0]. /// \returns A 128-bit integer vector containing the shuffled values. #define _mm_shufflehi_epi16(a, imm) \ ((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm))) /// Unpacks the high-order (index 8-15) values from two 128-bit vectors /// of [16 x i8] and interleaves them into a 128-bit vector of [16 x i8]. /// /// \headerfile /// /// This intrinsic corresponds to the VPUNPCKHBW / PUNPCKHBW /// instruction. /// /// \param __a /// A 128-bit vector of [16 x i8]. /// Bits [71:64] are written to bits [7:0] of the result. \n /// Bits [79:72] are written to bits [23:16] of the result. \n /// Bits [87:80] are written to bits [39:32] of the result. \n /// Bits [95:88] are written to bits [55:48] of the result. \n /// Bits [103:96] are written to bits [71:64] of the result. \n /// Bits [111:104] are written to bits [87:80] of the result. \n /// Bits [119:112] are written to bits [103:96] of the result. \n /// Bits [127:120] are written to bits [119:112] of the result. /// \param __b /// A 128-bit vector of [16 x i8]. \n /// Bits [71:64] are written to bits [15:8] of the result. \n /// Bits [79:72] are written to bits [31:24] of the result. \n /// Bits [87:80] are written to bits [47:40] of the result. \n /// Bits [95:88] are written to bits [63:56] of the result. \n /// Bits [103:96] are written to bits [79:72] of the result. \n /// Bits [111:104] are written to bits [95:88] of the result. \n /// Bits [119:112] are written to bits [111:104] of the result. \n /// Bits [127:120] are written to bits [127:120] of the result. /// \returns A 128-bit vector of [16 x i8] containing the interleaved values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi8(__m128i __a, __m128i __b) { return (__m128i)__builtin_shufflevector( (__v16qi)__a, (__v16qi)__b, 8, 16 + 8, 9, 16 + 9, 10, 16 + 10, 11, 16 + 11, 12, 16 + 12, 13, 16 + 13, 14, 16 + 14, 15, 16 + 15); } /// Unpacks the high-order (index 4-7) values from two 128-bit vectors of /// [8 x i16] and interleaves them into a 128-bit vector of [8 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the VPUNPCKHWD / PUNPCKHWD /// instruction. /// /// \param __a /// A 128-bit vector of [8 x i16]. /// Bits [79:64] are written to bits [15:0] of the result. \n /// Bits [95:80] are written to bits [47:32] of the result. \n /// Bits [111:96] are written to bits [79:64] of the result. \n /// Bits [127:112] are written to bits [111:96] of the result. /// \param __b /// A 128-bit vector of [8 x i16]. /// Bits [79:64] are written to bits [31:16] of the result. \n /// Bits [95:80] are written to bits [63:48] of the result. \n /// Bits [111:96] are written to bits [95:80] of the result. \n /// Bits [127:112] are written to bits [127:112] of the result. /// \returns A 128-bit vector of [8 x i16] containing the interleaved values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8 + 4, 5, 8 + 5, 6, 8 + 6, 7, 8 + 7); } /// Unpacks the high-order (index 2,3) values from two 128-bit vectors of /// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the VPUNPCKHDQ / PUNPCKHDQ /// instruction. /// /// \param __a /// A 128-bit vector of [4 x i32]. \n /// Bits [95:64] are written to bits [31:0] of the destination. \n /// Bits [127:96] are written to bits [95:64] of the destination. /// \param __b /// A 128-bit vector of [4 x i32]. \n /// Bits [95:64] are written to bits [64:32] of the destination. \n /// Bits [127:96] are written to bits [127:96] of the destination. /// \returns A 128-bit vector of [4 x i32] containing the interleaved values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi32(__m128i __a, __m128i __b) { return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4 + 2, 3, 4 + 3); } /// Unpacks the high-order 64-bit elements from two 128-bit vectors of /// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64]. /// /// \headerfile /// /// This intrinsic corresponds to the VPUNPCKHQDQ / PUNPCKHQDQ /// instruction. /// /// \param __a /// A 128-bit vector of [2 x i64]. \n /// Bits [127:64] are written to bits [63:0] of the destination. /// \param __b /// A 128-bit vector of [2 x i64]. \n /// Bits [127:64] are written to bits [127:64] of the destination. /// \returns A 128-bit vector of [2 x i64] containing the interleaved values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi64(__m128i __a, __m128i __b) { return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 1, 2 + 1); } /// Unpacks the low-order (index 0-7) values from two 128-bit vectors of /// [16 x i8] and interleaves them into a 128-bit vector of [16 x i8]. /// /// \headerfile /// /// This intrinsic corresponds to the VPUNPCKLBW / PUNPCKLBW /// instruction. /// /// \param __a /// A 128-bit vector of [16 x i8]. \n /// Bits [7:0] are written to bits [7:0] of the result. \n /// Bits [15:8] are written to bits [23:16] of the result. \n /// Bits [23:16] are written to bits [39:32] of the result. \n /// Bits [31:24] are written to bits [55:48] of the result. \n /// Bits [39:32] are written to bits [71:64] of the result. \n /// Bits [47:40] are written to bits [87:80] of the result. \n /// Bits [55:48] are written to bits [103:96] of the result. \n /// Bits [63:56] are written to bits [119:112] of the result. /// \param __b /// A 128-bit vector of [16 x i8]. /// Bits [7:0] are written to bits [15:8] of the result. \n /// Bits [15:8] are written to bits [31:24] of the result. \n /// Bits [23:16] are written to bits [47:40] of the result. \n /// Bits [31:24] are written to bits [63:56] of the result. \n /// Bits [39:32] are written to bits [79:72] of the result. \n /// Bits [47:40] are written to bits [95:88] of the result. \n /// Bits [55:48] are written to bits [111:104] of the result. \n /// Bits [63:56] are written to bits [127:120] of the result. /// \returns A 128-bit vector of [16 x i8] containing the interleaved values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi8(__m128i __a, __m128i __b) { return (__m128i)__builtin_shufflevector( (__v16qi)__a, (__v16qi)__b, 0, 16 + 0, 1, 16 + 1, 2, 16 + 2, 3, 16 + 3, 4, 16 + 4, 5, 16 + 5, 6, 16 + 6, 7, 16 + 7); } /// Unpacks the low-order (index 0-3) values from each of the two 128-bit /// vectors of [8 x i16] and interleaves them into a 128-bit vector of /// [8 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the VPUNPCKLWD / PUNPCKLWD /// instruction. /// /// \param __a /// A 128-bit vector of [8 x i16]. /// Bits [15:0] are written to bits [15:0] of the result. \n /// Bits [31:16] are written to bits [47:32] of the result. \n /// Bits [47:32] are written to bits [79:64] of the result. \n /// Bits [63:48] are written to bits [111:96] of the result. /// \param __b /// A 128-bit vector of [8 x i16]. /// Bits [15:0] are written to bits [31:16] of the result. \n /// Bits [31:16] are written to bits [63:48] of the result. \n /// Bits [47:32] are written to bits [95:80] of the result. \n /// Bits [63:48] are written to bits [127:112] of the result. /// \returns A 128-bit vector of [8 x i16] containing the interleaved values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8 + 0, 1, 8 + 1, 2, 8 + 2, 3, 8 + 3); } /// Unpacks the low-order (index 0,1) values from two 128-bit vectors of /// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the VPUNPCKLDQ / PUNPCKLDQ /// instruction. /// /// \param __a /// A 128-bit vector of [4 x i32]. \n /// Bits [31:0] are written to bits [31:0] of the destination. \n /// Bits [63:32] are written to bits [95:64] of the destination. /// \param __b /// A 128-bit vector of [4 x i32]. \n /// Bits [31:0] are written to bits [64:32] of the destination. \n /// Bits [63:32] are written to bits [127:96] of the destination. /// \returns A 128-bit vector of [4 x i32] containing the interleaved values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi32(__m128i __a, __m128i __b) { return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4 + 0, 1, 4 + 1); } /// Unpacks the low-order 64-bit elements from two 128-bit vectors of /// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64]. /// /// \headerfile /// /// This intrinsic corresponds to the VPUNPCKLQDQ / PUNPCKLQDQ /// instruction. /// /// \param __a /// A 128-bit vector of [2 x i64]. \n /// Bits [63:0] are written to bits [63:0] of the destination. \n /// \param __b /// A 128-bit vector of [2 x i64]. \n /// Bits [63:0] are written to bits [127:64] of the destination. \n /// \returns A 128-bit vector of [2 x i64] containing the interleaved values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi64(__m128i __a, __m128i __b) { return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 0, 2 + 0); } /// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit /// integer. /// /// \headerfile /// /// This intrinsic corresponds to the MOVDQ2Q instruction. /// /// \param __a /// A 128-bit integer vector operand. The lower 64 bits are moved to the /// destination. /// \returns A 64-bit integer containing the lower 64 bits of the parameter. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_movepi64_pi64(__m128i __a) { return (__m64)__a[0]; } /// Moves the 64-bit operand to a 128-bit integer vector, zeroing the /// upper bits. /// /// \headerfile /// /// This intrinsic corresponds to the MOVD+VMOVQ instruction. /// /// \param __a /// A 64-bit value. /// \returns A 128-bit integer vector. The lower 64 bits contain the value from /// the operand. The upper 64 bits are assigned zeros. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_movpi64_epi64(__m64 __a) { return __extension__(__m128i)(__v2di){(long long)__a, 0}; } /// Moves the lower 64 bits of a 128-bit integer vector to a 128-bit /// integer vector, zeroing the upper bits. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVQ / MOVQ instruction. /// /// \param __a /// A 128-bit integer vector operand. The lower 64 bits are moved to the /// destination. /// \returns A 128-bit integer vector. The lower 64 bits contain the value from /// the operand. The upper 64 bits are assigned zeros. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_move_epi64(__m128i __a) { return __builtin_shufflevector((__v2di)__a, _mm_setzero_si128(), 0, 2); } /// Unpacks the high-order 64-bit elements from two 128-bit vectors of /// [2 x double] and interleaves them into a 128-bit vector of [2 x /// double]. /// /// \headerfile /// /// This intrinsic corresponds to the VUNPCKHPD / UNPCKHPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. \n /// Bits [127:64] are written to bits [63:0] of the destination. /// \param __b /// A 128-bit vector of [2 x double]. \n /// Bits [127:64] are written to bits [127:64] of the destination. /// \returns A 128-bit vector of [2 x double] containing the interleaved values. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpackhi_pd(__m128d __a, __m128d __b) { return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 1, 2 + 1); } /// Unpacks the low-order 64-bit elements from two 128-bit vectors /// of [2 x double] and interleaves them into a 128-bit vector of [2 x /// double]. /// /// \headerfile /// /// This intrinsic corresponds to the VUNPCKLPD / UNPCKLPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. \n /// Bits [63:0] are written to bits [63:0] of the destination. /// \param __b /// A 128-bit vector of [2 x double]. \n /// Bits [63:0] are written to bits [127:64] of the destination. /// \returns A 128-bit vector of [2 x double] containing the interleaved values. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpacklo_pd(__m128d __a, __m128d __b) { return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 0, 2 + 0); } /// Extracts the sign bits of the double-precision values in the 128-bit /// vector of [2 x double], zero-extends the value, and writes it to the /// low-order bits of the destination. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVMSKPD / MOVMSKPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing the values with sign bits to /// be extracted. /// \returns The sign bits from each of the double-precision elements in \a __a, /// written to bits [1:0]. The remaining bits are assigned values of zero. static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_pd(__m128d __a) { return __builtin_ia32_movmskpd((__v2df)__a); } /// Constructs a 128-bit floating-point vector of [2 x double] from two /// 128-bit vector parameters of [2 x double], using the immediate-value /// parameter as a specifier. /// /// \headerfile /// /// \code /// __m128d _mm_shuffle_pd(__m128d a, __m128d b, const int i); /// \endcode /// /// This intrinsic corresponds to the VSHUFPD / SHUFPD instruction. /// /// \param a /// A 128-bit vector of [2 x double]. /// \param b /// A 128-bit vector of [2 x double]. /// \param i /// An 8-bit immediate value. The least significant two bits specify which /// elements to copy from \a a and \a b: \n /// Bit[0] = 0: lower element of \a a copied to lower element of result. \n /// Bit[0] = 1: upper element of \a a copied to lower element of result. \n /// Bit[1] = 0: lower element of \a b copied to upper element of result. \n /// Bit[1] = 1: upper element of \a b copied to upper element of result. \n /// Note: To generate a mask, you can use the \c _MM_SHUFFLE2 macro. /// _MM_SHUFFLE2(b1, b0) can create a 2-bit mask of the form /// [b1, b0]. /// \returns A 128-bit vector of [2 x double] containing the shuffled values. #define _mm_shuffle_pd(a, b, i) \ ((__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \ (int)(i))) /// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit /// floating-point vector of [4 x float]. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 128-bit floating-point vector of [2 x double]. /// \returns A 128-bit floating-point vector of [4 x float] containing the same /// bitwise pattern as the parameter. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castpd_ps(__m128d __a) { return (__m128)__a; } /// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit /// integer vector. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 128-bit floating-point vector of [2 x double]. /// \returns A 128-bit integer vector containing the same bitwise pattern as the /// parameter. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_castpd_si128(__m128d __a) { return (__m128i)__a; } /// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit /// floating-point vector of [2 x double]. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 128-bit floating-point vector of [4 x float]. /// \returns A 128-bit floating-point vector of [2 x double] containing the same /// bitwise pattern as the parameter. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castps_pd(__m128 __a) { return (__m128d)__a; } /// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit /// integer vector. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 128-bit floating-point vector of [4 x float]. /// \returns A 128-bit integer vector containing the same bitwise pattern as the /// parameter. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_castps_si128(__m128 __a) { return (__m128i)__a; } /// Casts a 128-bit integer vector into a 128-bit floating-point vector /// of [4 x float]. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 128-bit integer vector. /// \returns A 128-bit floating-point vector of [4 x float] containing the same /// bitwise pattern as the parameter. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castsi128_ps(__m128i __a) { return (__m128)__a; } /// Casts a 128-bit integer vector into a 128-bit floating-point vector /// of [2 x double]. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 128-bit integer vector. /// \returns A 128-bit floating-point vector of [2 x double] containing the same /// bitwise pattern as the parameter. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castsi128_pd(__m128i __a) { return (__m128d)__a; } #if defined(__cplusplus) extern "C" { #endif /// Indicates that a spin loop is being executed for the purposes of /// optimizing power consumption during the loop. /// /// \headerfile /// /// This intrinsic corresponds to the PAUSE instruction. /// void _mm_pause(void); #if defined(__cplusplus) } // extern "C" #endif #undef __DEFAULT_FN_ATTRS #undef __DEFAULT_FN_ATTRS_MMX #define _MM_SHUFFLE2(x, y) (((x) << 1) | (y)) #define _MM_DENORMALS_ZERO_ON (0x0040U) #define _MM_DENORMALS_ZERO_OFF (0x0000U) #define _MM_DENORMALS_ZERO_MASK (0x0040U) #define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK) #define _MM_SET_DENORMALS_ZERO_MODE(x) \ (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x))) #endif /* __EMMINTRIN_H */ /*===------------------ enqcmdintrin.h - enqcmd intrinsics -----------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __ENQCMDINTRIN_H #define __ENQCMDINTRIN_H /* Define the default attributes for the functions in this file */ #define _DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("enqcmd"))) /// Reads 64-byte command pointed by \a __src, formats 64-byte enqueue store /// data, and performs 64-byte enqueue store to memory pointed by \a __dst. /// This intrinsics may only be used in User mode. /// /// \headerfile /// /// This intrinsics corresponds to the ENQCMD instruction. /// /// \param __dst /// Pointer to the destination of the enqueue store. /// \param __src /// Pointer to 64-byte command data. /// \returns If the command data is successfully written to \a __dst then 0 is /// returned. Otherwise 1 is returned. static __inline__ int _DEFAULT_FN_ATTRS _enqcmd (void *__dst, const void *__src) { return __builtin_ia32_enqcmd(__dst, __src); } /// Reads 64-byte command pointed by \a __src, formats 64-byte enqueue store /// data, and performs 64-byte enqueue store to memory pointed by \a __dst /// This intrinsic may only be used in Privileged mode. /// /// \headerfile /// /// This intrinsics corresponds to the ENQCMDS instruction. /// /// \param __dst /// Pointer to the destination of the enqueue store. /// \param __src /// Pointer to 64-byte command data. /// \returns If the command data is successfully written to \a __dst then 0 is /// returned. Otherwise 1 is returned. static __inline__ int _DEFAULT_FN_ATTRS _enqcmds (void *__dst, const void *__src) { return __builtin_ia32_enqcmds(__dst, __src); } #undef _DEFAULT_FN_ATTRS #endif /* __ENQCMDINTRIN_H */ /*===---- f16cintrin.h - F16C intrinsics -----------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #if !defined __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __F16CINTRIN_H #define __F16CINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(256))) /* NOTE: Intel documents the 128-bit versions of these as being in emmintrin.h, * but that's because icc can emulate these without f16c using a library call. * Since we don't do that let's leave these in f16cintrin.h. */ /// Converts a 16-bit half-precision float value into a 32-bit float /// value. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTPH2PS instruction. /// /// \param __a /// A 16-bit half-precision float value. /// \returns The converted 32-bit float value. static __inline float __DEFAULT_FN_ATTRS128 _cvtsh_ss(unsigned short __a) { __v8hi __v = {(short)__a, 0, 0, 0, 0, 0, 0, 0}; __v4sf __r = __builtin_ia32_vcvtph2ps(__v); return __r[0]; } /// Converts a 32-bit single-precision float value to a 16-bit /// half-precision float value. /// /// \headerfile /// /// \code /// unsigned short _cvtss_sh(float a, const int imm); /// \endcode /// /// This intrinsic corresponds to the VCVTPS2PH instruction. /// /// \param a /// A 32-bit single-precision float value to be converted to a 16-bit /// half-precision float value. /// \param imm /// An immediate value controlling rounding using bits [2:0]: \n /// 000: Nearest \n /// 001: Down \n /// 010: Up \n /// 011: Truncate \n /// 1XX: Use MXCSR.RC for rounding /// \returns The converted 16-bit half-precision float value. #define _cvtss_sh(a, imm) __extension__ ({ \ (unsigned short)(((__v8hi)__builtin_ia32_vcvtps2ph((__v4sf){a, 0, 0, 0}, \ (imm)))[0]); }) /// Converts a 128-bit vector containing 32-bit float values into a /// 128-bit vector containing 16-bit half-precision float values. /// /// \headerfile /// /// \code /// __m128i _mm_cvtps_ph(__m128 a, const int imm); /// \endcode /// /// This intrinsic corresponds to the VCVTPS2PH instruction. /// /// \param a /// A 128-bit vector containing 32-bit float values. /// \param imm /// An immediate value controlling rounding using bits [2:0]: \n /// 000: Nearest \n /// 001: Down \n /// 010: Up \n /// 011: Truncate \n /// 1XX: Use MXCSR.RC for rounding /// \returns A 128-bit vector containing converted 16-bit half-precision float /// values. The lower 64 bits are used to store the converted 16-bit /// half-precision floating-point values. #define _mm_cvtps_ph(a, imm) \ ((__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm))) /// Converts a 128-bit vector containing 16-bit half-precision float /// values into a 128-bit vector containing 32-bit float values. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTPH2PS instruction. /// /// \param __a /// A 128-bit vector containing 16-bit half-precision float values. The lower /// 64 bits are used in the conversion. /// \returns A 128-bit vector of [4 x float] containing converted float values. static __inline __m128 __DEFAULT_FN_ATTRS128 _mm_cvtph_ps(__m128i __a) { return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a); } /// Converts a 256-bit vector of [8 x float] into a 128-bit vector /// containing 16-bit half-precision float values. /// /// \headerfile /// /// \code /// __m128i _mm256_cvtps_ph(__m256 a, const int imm); /// \endcode /// /// This intrinsic corresponds to the VCVTPS2PH instruction. /// /// \param a /// A 256-bit vector containing 32-bit single-precision float values to be /// converted to 16-bit half-precision float values. /// \param imm /// An immediate value controlling rounding using bits [2:0]: \n /// 000: Nearest \n /// 001: Down \n /// 010: Up \n /// 011: Truncate \n /// 1XX: Use MXCSR.RC for rounding /// \returns A 128-bit vector containing the converted 16-bit half-precision /// float values. #define _mm256_cvtps_ph(a, imm) \ ((__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm))) /// Converts a 128-bit vector containing 16-bit half-precision float /// values into a 256-bit vector of [8 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTPH2PS instruction. /// /// \param __a /// A 128-bit vector containing 16-bit half-precision float values to be /// converted to 32-bit single-precision float values. /// \returns A vector of [8 x float] containing the converted 32-bit /// single-precision float values. static __inline __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtph_ps(__m128i __a) { return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a); } #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #endif /* __F16CINTRIN_H */ /*===---- float.h - Characteristics of floating point types ----------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CLANG_FLOAT_H #define __CLANG_FLOAT_H /* If we're on MinGW, fall back to the system's float.h, which might have * additional definitions provided for Windows. * For more details see http://msdn.microsoft.com/en-us/library/y0ybw9fy.aspx * * Also fall back on Darwin and AIX to allow additional definitions and * implementation-defined values. */ #if (defined(__APPLE__) || defined(__MINGW32__) || defined(_MSC_VER) || \ defined(_AIX)) && \ __STDC_HOSTED__ && __has_include_next() /* Prior to Apple's 10.7 SDK, float.h SDK header used to apply an extra level * of #include_next to keep Metrowerks compilers happy. Avoid this * extra indirection. */ #ifdef __APPLE__ #define _FLOAT_H_ #endif # include_next /* Undefine anything that we'll be redefining below. */ # undef FLT_EVAL_METHOD # undef FLT_ROUNDS # undef FLT_RADIX # undef FLT_MANT_DIG # undef DBL_MANT_DIG # undef LDBL_MANT_DIG #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ !defined(__STRICT_ANSI__) || \ (defined(__cplusplus) && __cplusplus >= 201103L) || \ (__STDC_HOSTED__ && defined(_AIX) && defined(_ALL_SOURCE)) # undef DECIMAL_DIG # endif # undef FLT_DIG # undef DBL_DIG # undef LDBL_DIG # undef FLT_MIN_EXP # undef DBL_MIN_EXP # undef LDBL_MIN_EXP # undef FLT_MIN_10_EXP # undef DBL_MIN_10_EXP # undef LDBL_MIN_10_EXP # undef FLT_MAX_EXP # undef DBL_MAX_EXP # undef LDBL_MAX_EXP # undef FLT_MAX_10_EXP # undef DBL_MAX_10_EXP # undef LDBL_MAX_10_EXP # undef FLT_MAX # undef DBL_MAX # undef LDBL_MAX # undef FLT_EPSILON # undef DBL_EPSILON # undef LDBL_EPSILON # undef FLT_MIN # undef DBL_MIN # undef LDBL_MIN #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || \ !defined(__STRICT_ANSI__) || \ (defined(__cplusplus) && __cplusplus >= 201703L) || \ (__STDC_HOSTED__ && defined(_AIX) && defined(_ALL_SOURCE)) # undef FLT_TRUE_MIN # undef DBL_TRUE_MIN # undef LDBL_TRUE_MIN # undef FLT_DECIMAL_DIG # undef DBL_DECIMAL_DIG # undef LDBL_DECIMAL_DIG # undef FLT_HAS_SUBNORM # undef DBL_HAS_SUBNORM # undef LDBL_HAS_SUBNORM # endif #endif /* Characteristics of floating point types, C99 5.2.4.2.2 */ #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ (defined(__cplusplus) && __cplusplus >= 201103L) #define FLT_EVAL_METHOD __FLT_EVAL_METHOD__ #endif #define FLT_ROUNDS (__builtin_flt_rounds()) #define FLT_RADIX __FLT_RADIX__ #define FLT_MANT_DIG __FLT_MANT_DIG__ #define DBL_MANT_DIG __DBL_MANT_DIG__ #define LDBL_MANT_DIG __LDBL_MANT_DIG__ #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ !defined(__STRICT_ANSI__) || \ (defined(__cplusplus) && __cplusplus >= 201103L) || \ (__STDC_HOSTED__ && defined(_AIX) && defined(_ALL_SOURCE)) # define DECIMAL_DIG __DECIMAL_DIG__ #endif #define FLT_DIG __FLT_DIG__ #define DBL_DIG __DBL_DIG__ #define LDBL_DIG __LDBL_DIG__ #define FLT_MIN_EXP __FLT_MIN_EXP__ #define DBL_MIN_EXP __DBL_MIN_EXP__ #define LDBL_MIN_EXP __LDBL_MIN_EXP__ #define FLT_MIN_10_EXP __FLT_MIN_10_EXP__ #define DBL_MIN_10_EXP __DBL_MIN_10_EXP__ #define LDBL_MIN_10_EXP __LDBL_MIN_10_EXP__ #define FLT_MAX_EXP __FLT_MAX_EXP__ #define DBL_MAX_EXP __DBL_MAX_EXP__ #define LDBL_MAX_EXP __LDBL_MAX_EXP__ #define FLT_MAX_10_EXP __FLT_MAX_10_EXP__ #define DBL_MAX_10_EXP __DBL_MAX_10_EXP__ #define LDBL_MAX_10_EXP __LDBL_MAX_10_EXP__ #define FLT_MAX __FLT_MAX__ #define DBL_MAX __DBL_MAX__ #define LDBL_MAX __LDBL_MAX__ #define FLT_EPSILON __FLT_EPSILON__ #define DBL_EPSILON __DBL_EPSILON__ #define LDBL_EPSILON __LDBL_EPSILON__ #define FLT_MIN __FLT_MIN__ #define DBL_MIN __DBL_MIN__ #define LDBL_MIN __LDBL_MIN__ #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || \ !defined(__STRICT_ANSI__) || \ (defined(__cplusplus) && __cplusplus >= 201703L) || \ (__STDC_HOSTED__ && defined(_AIX) && defined(_ALL_SOURCE)) # define FLT_TRUE_MIN __FLT_DENORM_MIN__ # define DBL_TRUE_MIN __DBL_DENORM_MIN__ # define LDBL_TRUE_MIN __LDBL_DENORM_MIN__ # define FLT_DECIMAL_DIG __FLT_DECIMAL_DIG__ # define DBL_DECIMAL_DIG __DBL_DECIMAL_DIG__ # define LDBL_DECIMAL_DIG __LDBL_DECIMAL_DIG__ # define FLT_HAS_SUBNORM __FLT_HAS_DENORM__ # define DBL_HAS_SUBNORM __DBL_HAS_DENORM__ # define LDBL_HAS_SUBNORM __LDBL_HAS_DENORM__ #endif #ifdef __STDC_WANT_IEC_60559_TYPES_EXT__ # define FLT16_MANT_DIG __FLT16_MANT_DIG__ # define FLT16_DECIMAL_DIG __FLT16_DECIMAL_DIG__ # define FLT16_DIG __FLT16_DIG__ # define FLT16_MIN_EXP __FLT16_MIN_EXP__ # define FLT16_MIN_10_EXP __FLT16_MIN_10_EXP__ # define FLT16_MAX_EXP __FLT16_MAX_EXP__ # define FLT16_MAX_10_EXP __FLT16_MAX_10_EXP__ # define FLT16_MAX __FLT16_MAX__ # define FLT16_EPSILON __FLT16_EPSILON__ # define FLT16_MIN __FLT16_MIN__ # define FLT16_TRUE_MIN __FLT16_TRUE_MIN__ #endif /* __STDC_WANT_IEC_60559_TYPES_EXT__ */ #endif /* __CLANG_FLOAT_H */ /*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __X86INTRIN_H #error "Never use directly; include instead." #endif #ifndef __FMA4INTRIN_H #define __FMA4INTRIN_H #include /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma4"), __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma4"), __min_vector_width__(256))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_macc_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_macc_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_macc_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_msub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_msub_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_msub_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, -(__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); } #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #endif /* __FMA4INTRIN_H */ /*===---- fmaintrin.h - FMA intrinsics -------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __FMAINTRIN_H #define __FMAINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(256))) /// Computes a multiply-add of 128-bit vectors of [4 x float]. /// For each element, computes (__A * __B) + __C . /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMADD213PS instruction. /// /// \param __A /// A 128-bit vector of [4 x float] containing the multiplicand. /// \param __B /// A 128-bit vector of [4 x float] containing the multiplier. /// \param __C /// A 128-bit vector of [4 x float] containing the addend. /// \returns A 128-bit vector of [4 x float] containing the result. static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } /// Computes a multiply-add of 128-bit vectors of [2 x double]. /// For each element, computes (__A * __B) + __C . /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMADD213PD instruction. /// /// \param __A /// A 128-bit vector of [2 x double] containing the multiplicand. /// \param __B /// A 128-bit vector of [2 x double] containing the multiplier. /// \param __C /// A 128-bit vector of [2 x double] containing the addend. /// \returns A 128-bit [2 x double] vector containing the result. static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); } /// Computes a scalar multiply-add of the single-precision values in the /// low 32 bits of 128-bit vectors of [4 x float]. /// \code /// result[31:0] = (__A[31:0] * __B[31:0]) + __C[31:0] /// result[127:32] = __A[127:32] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMADD213SS instruction. /// /// \param __A /// A 128-bit vector of [4 x float] containing the multiplicand in the low /// 32 bits. /// \param __B /// A 128-bit vector of [4 x float] containing the multiplier in the low /// 32 bits. /// \param __C /// A 128-bit vector of [4 x float] containing the addend in the low /// 32 bits. /// \returns A 128-bit vector of [4 x float] containing the result in the low /// 32 bits and a copy of \a __A[127:32] in the upper 96 bits. static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } /// Computes a scalar multiply-add of the double-precision values in the /// low 64 bits of 128-bit vectors of [2 x double]. /// \code /// result[63:0] = (__A[63:0] * __B[63:0]) + __C[63:0] /// result[127:64] = __A[127:64] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMADD213SD instruction. /// /// \param __A /// A 128-bit vector of [2 x double] containing the multiplicand in the low /// 64 bits. /// \param __B /// A 128-bit vector of [2 x double] containing the multiplier in the low /// 64 bits. /// \param __C /// A 128-bit vector of [2 x double] containing the addend in the low /// 64 bits. /// \returns A 128-bit vector of [2 x double] containing the result in the low /// 64 bits and a copy of \a __A[127:64] in the upper 64 bits. static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C); } /// Computes a multiply-subtract of 128-bit vectors of [4 x float]. /// For each element, computes (__A * __B) - __C . /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMSUB213PS instruction. /// /// \param __A /// A 128-bit vector of [4 x float] containing the multiplicand. /// \param __B /// A 128-bit vector of [4 x float] containing the multiplier. /// \param __C /// A 128-bit vector of [4 x float] containing the subtrahend. /// \returns A 128-bit vector of [4 x float] containing the result. static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } /// Computes a multiply-subtract of 128-bit vectors of [2 x double]. /// For each element, computes (__A * __B) - __C . /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMSUB213PD instruction. /// /// \param __A /// A 128-bit vector of [2 x double] containing the multiplicand. /// \param __B /// A 128-bit vector of [2 x double] containing the multiplier. /// \param __C /// A 128-bit vector of [2 x double] containing the addend. /// \returns A 128-bit vector of [2 x double] containing the result. static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); } /// Computes a scalar multiply-subtract of the single-precision values in /// the low 32 bits of 128-bit vectors of [4 x float]. /// \code /// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0] /// result[127:32] = __A[127:32] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMSUB213SS instruction. /// /// \param __A /// A 128-bit vector of [4 x float] containing the multiplicand in the low /// 32 bits. /// \param __B /// A 128-bit vector of [4 x float] containing the multiplier in the low /// 32 bits. /// \param __C /// A 128-bit vector of [4 x float] containing the subtrahend in the low /// 32 bits. /// \returns A 128-bit vector of [4 x float] containing the result in the low /// 32 bits, and a copy of \a __A[127:32] in the upper 96 bits. static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } /// Computes a scalar multiply-subtract of the double-precision values in /// the low 64 bits of 128-bit vectors of [2 x double]. /// \code /// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0] /// result[127:64] = __A[127:64] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMSUB213SD instruction. /// /// \param __A /// A 128-bit vector of [2 x double] containing the multiplicand in the low /// 64 bits. /// \param __B /// A 128-bit vector of [2 x double] containing the multiplier in the low /// 64 bits. /// \param __C /// A 128-bit vector of [2 x double] containing the subtrahend in the low /// 64 bits. /// \returns A 128-bit vector of [2 x double] containing the result in the low /// 64 bits, and a copy of \a __A[127:64] in the upper 64 bits. static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C); } /// Computes a negated multiply-add of 128-bit vectors of [4 x float]. /// For each element, computes -(__A * __B) + __C . /// /// \headerfile /// /// This intrinsic corresponds to the \c VFNMADD213DPS instruction. /// /// \param __A /// A 128-bit vector of [4 x float] containing the multiplicand. /// \param __B /// A 128-bit vector of [4 x float] containing the multiplier. /// \param __C /// A 128-bit vector of [4 x float] containing the addend. /// \returns A 128-bit [4 x float] vector containing the result. static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } /// Computes a negated multiply-add of 128-bit vectors of [2 x double]. /// For each element, computes -(__A * __B) + __C . /// /// \headerfile /// /// This intrinsic corresponds to the \c VFNMADD213PD instruction. /// /// \param __A /// A 128-bit vector of [2 x double] containing the multiplicand. /// \param __B /// A 128-bit vector of [2 x double] containing the multiplier. /// \param __C /// A 128-bit vector of [2 x double] containing the addend. /// \returns A 128-bit vector of [2 x double] containing the result. static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C); } /// Computes a scalar negated multiply-add of the single-precision values in /// the low 32 bits of 128-bit vectors of [4 x float]. /// \code /// result[31:0] = -(__A[31:0] * __B[31:0]) + __C[31:0] /// result[127:32] = __A[127:32] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VFNMADD213SS instruction. /// /// \param __A /// A 128-bit vector of [4 x float] containing the multiplicand in the low /// 32 bits. /// \param __B /// A 128-bit vector of [4 x float] containing the multiplier in the low /// 32 bits. /// \param __C /// A 128-bit vector of [4 x float] containing the addend in the low /// 32 bits. /// \returns A 128-bit vector of [4 x float] containing the result in the low /// 32 bits, and a copy of \a __A[127:32] in the upper 96 bits. static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C); } /// Computes a scalar negated multiply-add of the double-precision values /// in the low 64 bits of 128-bit vectors of [2 x double]. /// \code /// result[63:0] = -(__A[63:0] * __B[63:0]) + __C[63:0] /// result[127:64] = __A[127:64] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VFNMADD213SD instruction. /// /// \param __A /// A 128-bit vector of [2 x double] containing the multiplicand in the low /// 64 bits. /// \param __B /// A 128-bit vector of [2 x double] containing the multiplier in the low /// 64 bits. /// \param __C /// A 128-bit vector of [2 x double] containing the addend in the low /// 64 bits. /// \returns A 128-bit vector of [2 x double] containing the result in the low /// 64 bits, and a copy of \a __A[127:64] in the upper 64 bits. static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C); } /// Computes a negated multiply-subtract of 128-bit vectors of [4 x float]. /// For each element, computes -(__A * __B) - __C . /// /// \headerfile /// /// This intrinsic corresponds to the \c VFNMSUB213PS instruction. /// /// \param __A /// A 128-bit vector of [4 x float] containing the multiplicand. /// \param __B /// A 128-bit vector of [4 x float] containing the multiplier. /// \param __C /// A 128-bit vector of [4 x float] containing the subtrahend. /// \returns A 128-bit vector of [4 x float] containing the result. static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } /// Computes a negated multiply-subtract of 128-bit vectors of [2 x double]. /// For each element, computes -(__A * __B) - __C . /// /// \headerfile /// /// This intrinsic corresponds to the \c VFNMSUB213PD instruction. /// /// \param __A /// A 128-bit vector of [2 x double] containing the multiplicand. /// \param __B /// A 128-bit vector of [2 x double] containing the multiplier. /// \param __C /// A 128-bit vector of [2 x double] containing the subtrahend. /// \returns A 128-bit vector of [2 x double] containing the result. static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C); } /// Computes a scalar negated multiply-subtract of the single-precision /// values in the low 32 bits of 128-bit vectors of [4 x float]. /// \code /// result[31:0] = -(__A[31:0] * __B[31:0]) - __C[31:0] /// result[127:32] = __A[127:32] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VFNMSUB213SS instruction. /// /// \param __A /// A 128-bit vector of [4 x float] containing the multiplicand in the low /// 32 bits. /// \param __B /// A 128-bit vector of [4 x float] containing the multiplier in the low /// 32 bits. /// \param __C /// A 128-bit vector of [4 x float] containing the subtrahend in the low /// 32 bits. /// \returns A 128-bit vector of [4 x float] containing the result in the low /// 32 bits, and a copy of \a __A[127:32] in the upper 96 bits. static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C); } /// Computes a scalar negated multiply-subtract of the double-precision /// values in the low 64 bits of 128-bit vectors of [2 x double]. /// \code /// result[63:0] = -(__A[63:0] * __B[63:0]) - __C[63:0] /// result[127:64] = __A[127:64] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VFNMSUB213SD instruction. /// /// \param __A /// A 128-bit vector of [2 x double] containing the multiplicand in the low /// 64 bits. /// \param __B /// A 128-bit vector of [2 x double] containing the multiplier in the low /// 64 bits. /// \param __C /// A 128-bit vector of [2 x double] containing the subtrahend in the low /// 64 bits. /// \returns A 128-bit vector of [2 x double] containing the result in the low /// 64 bits, and a copy of \a __A[127:64] in the upper 64 bits. static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C); } /// Computes a multiply with alternating add/subtract of 128-bit vectors of /// [4 x float]. /// \code /// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0] /// result[63:32] = (__A[63:32] * __B[63:32]) + __C[63:32] /// result[95:64] = (__A[95:64] * __B[95:64]) - __C[95:64] /// result[127:96] = (__A[127:96] * __B[127:96]) + __C[127:96] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMADDSUB213PS instruction. /// /// \param __A /// A 128-bit vector of [4 x float] containing the multiplicand. /// \param __B /// A 128-bit vector of [4 x float] containing the multiplier. /// \param __C /// A 128-bit vector of [4 x float] containing the addend/subtrahend. /// \returns A 128-bit vector of [4 x float] containing the result. static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } /// Computes a multiply with alternating add/subtract of 128-bit vectors of /// [2 x double]. /// \code /// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0] /// result[127:64] = (__A[127:64] * __B[127:64]) + __C[127:64] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMADDSUB213PD instruction. /// /// \param __A /// A 128-bit vector of [2 x double] containing the multiplicand. /// \param __B /// A 128-bit vector of [2 x double] containing the multiplier. /// \param __C /// A 128-bit vector of [2 x double] containing the addend/subtrahend. /// \returns A 128-bit vector of [2 x double] containing the result. static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C); } /// Computes a multiply with alternating add/subtract of 128-bit vectors of /// [4 x float]. /// \code /// result[31:0] = (__A[31:0] * __B[31:0]) + __C[31:0] /// result[63:32] = (__A[63:32] * __B[63:32]) - __C[63:32] /// result[95:64] = (__A[95:64] * __B[95:64]) + __C[95:64] /// result[127:96 = (__A[127:96] * __B[127:96]) - __C[127:96] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMSUBADD213PS instruction. /// /// \param __A /// A 128-bit vector of [4 x float] containing the multiplicand. /// \param __B /// A 128-bit vector of [4 x float] containing the multiplier. /// \param __C /// A 128-bit vector of [4 x float] containing the addend/subtrahend. /// \returns A 128-bit vector of [4 x float] containing the result. static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } /// Computes a multiply with alternating add/subtract of 128-bit vectors of /// [2 x double]. /// \code /// result[63:0] = (__A[63:0] * __B[63:0]) + __C[63:0] /// result[127:64] = (__A[127:64] * __B[127:64]) - __C[127:64] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMADDSUB213PD instruction. /// /// \param __A /// A 128-bit vector of [2 x double] containing the multiplicand. /// \param __B /// A 128-bit vector of [2 x double] containing the multiplier. /// \param __C /// A 128-bit vector of [2 x double] containing the addend/subtrahend. /// \returns A 128-bit vector of [2 x double] containing the result. static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); } /// Computes a multiply-add of 256-bit vectors of [8 x float]. /// For each element, computes (__A * __B) + __C . /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMADD213PS instruction. /// /// \param __A /// A 256-bit vector of [8 x float] containing the multiplicand. /// \param __B /// A 256-bit vector of [8 x float] containing the multiplier. /// \param __C /// A 256-bit vector of [8 x float] containing the addend. /// \returns A 256-bit vector of [8 x float] containing the result. static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } /// Computes a multiply-add of 256-bit vectors of [4 x double]. /// For each element, computes (__A * __B) + __C . /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMADD213PD instruction. /// /// \param __A /// A 256-bit vector of [4 x double] containing the multiplicand. /// \param __B /// A 256-bit vector of [4 x double] containing the multiplier. /// \param __C /// A 256-bit vector of [4 x double] containing the addend. /// \returns A 256-bit vector of [4 x double] containing the result. static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); } /// Computes a multiply-subtract of 256-bit vectors of [8 x float]. /// For each element, computes (__A * __B) - __C . /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMSUB213PS instruction. /// /// \param __A /// A 256-bit vector of [8 x float] containing the multiplicand. /// \param __B /// A 256-bit vector of [8 x float] containing the multiplier. /// \param __C /// A 256-bit vector of [8 x float] containing the subtrahend. /// \returns A 256-bit vector of [8 x float] containing the result. static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } /// Computes a multiply-subtract of 256-bit vectors of [4 x double]. /// For each element, computes (__A * __B) - __C . /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMSUB213PD instruction. /// /// \param __A /// A 256-bit vector of [4 x double] containing the multiplicand. /// \param __B /// A 256-bit vector of [4 x double] containing the multiplier. /// \param __C /// A 256-bit vector of [4 x double] containing the subtrahend. /// \returns A 256-bit vector of [4 x double] containing the result. static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); } /// Computes a negated multiply-add of 256-bit vectors of [8 x float]. /// For each element, computes -(__A * __B) + __C . /// /// \headerfile /// /// This intrinsic corresponds to the \c VFNMADD213PS instruction. /// /// \param __A /// A 256-bit vector of [8 x float] containing the multiplicand. /// \param __B /// A 256-bit vector of [8 x float] containing the multiplier. /// \param __C /// A 256-bit vector of [8 x float] containing the addend. /// \returns A 256-bit vector of [8 x float] containing the result. static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } /// Computes a negated multiply-add of 256-bit vectors of [4 x double]. /// For each element, computes -(__A * __B) + __C . /// /// \headerfile /// /// This intrinsic corresponds to the \c VFNMADD213PD instruction. /// /// \param __A /// A 256-bit vector of [4 x double] containing the multiplicand. /// \param __B /// A 256-bit vector of [4 x double] containing the multiplier. /// \param __C /// A 256-bit vector of [4 x double] containing the addend. /// \returns A 256-bit vector of [4 x double] containing the result. static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C); } /// Computes a negated multiply-subtract of 256-bit vectors of [8 x float]. /// For each element, computes -(__A * __B) - __C . /// /// \headerfile /// /// This intrinsic corresponds to the \c VFNMSUB213PS instruction. /// /// \param __A /// A 256-bit vector of [8 x float] containing the multiplicand. /// \param __B /// A 256-bit vector of [8 x float] containing the multiplier. /// \param __C /// A 256-bit vector of [8 x float] containing the subtrahend. /// \returns A 256-bit vector of [8 x float] containing the result. static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } /// Computes a negated multiply-subtract of 256-bit vectors of [4 x double]. /// For each element, computes -(__A * __B) - __C . /// /// \headerfile /// /// This intrinsic corresponds to the \c VFNMSUB213PD instruction. /// /// \param __A /// A 256-bit vector of [4 x double] containing the multiplicand. /// \param __B /// A 256-bit vector of [4 x double] containing the multiplier. /// \param __C /// A 256-bit vector of [4 x double] containing the subtrahend. /// \returns A 256-bit vector of [4 x double] containing the result. static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C); } /// Computes a multiply with alternating add/subtract of 256-bit vectors of /// [8 x float]. /// \code /// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0] /// result[63:32] = (__A[63:32] * __B[63:32]) + __C[63:32] /// result[95:64] = (__A[95:64] * __B[95:64]) - __C[95:64] /// result[127:96] = (__A[127:96] * __B[127:96]) + __C[127:96] /// result[159:128] = (__A[159:128] * __B[159:128]) - __C[159:128] /// result[191:160] = (__A[191:160] * __B[191:160]) + __C[191:160] /// result[223:192] = (__A[223:192] * __B[223:192]) - __C[223:192] /// result[255:224] = (__A[255:224] * __B[255:224]) + __C[255:224] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMADDSUB213PS instruction. /// /// \param __A /// A 256-bit vector of [8 x float] containing the multiplicand. /// \param __B /// A 256-bit vector of [8 x float] containing the multiplier. /// \param __C /// A 256-bit vector of [8 x float] containing the addend/subtrahend. /// \returns A 256-bit vector of [8 x float] containing the result. static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } /// Computes a multiply with alternating add/subtract of 256-bit vectors of /// [4 x double]. /// \code /// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0] /// result[127:64] = (__A[127:64] * __B[127:64]) + __C[127:64] /// result[191:128] = (__A[191:128] * __B[191:128]) - __C[191:128] /// result[255:192] = (__A[255:192] * __B[255:192]) + __C[255:192] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMADDSUB213PD instruction. /// /// \param __A /// A 256-bit vector of [4 x double] containing the multiplicand. /// \param __B /// A 256-bit vector of [4 x double] containing the multiplier. /// \param __C /// A 256-bit vector of [4 x double] containing the addend/subtrahend. /// \returns A 256-bit vector of [4 x double] containing the result. static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); } /// Computes a vector multiply with alternating add/subtract of 256-bit /// vectors of [8 x float]. /// \code /// result[31:0] = (__A[31:0] * __B[31:0]) + __C[31:0] /// result[63:32] = (__A[63:32] * __B[63:32]) - __C[63:32] /// result[95:64] = (__A[95:64] * __B[95:64]) + __C[95:64] /// result[127:96] = (__A[127:96] * __B[127:96]) - __C[127:96] /// result[159:128] = (__A[159:128] * __B[159:128]) + __C[159:128] /// result[191:160] = (__A[191:160] * __B[191:160]) - __C[191:160] /// result[223:192] = (__A[223:192] * __B[223:192]) + __C[223:192] /// result[255:224] = (__A[255:224] * __B[255:224]) - __C[255:224] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMSUBADD213PS instruction. /// /// \param __A /// A 256-bit vector of [8 x float] containing the multiplicand. /// \param __B /// A 256-bit vector of [8 x float] containing the multiplier. /// \param __C /// A 256-bit vector of [8 x float] containing the addend/subtrahend. /// \returns A 256-bit vector of [8 x float] containing the result. static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } /// Computes a vector multiply with alternating add/subtract of 256-bit /// vectors of [4 x double]. /// \code /// result[63:0] = (__A[63:0] * __B[63:0]) + __C[63:0] /// result[127:64] = (__A[127:64] * __B[127:64]) - __C[127:64] /// result[191:128] = (__A[191:128] * __B[191:128]) + __C[191:128] /// result[255:192] = (__A[255:192] * __B[255:192]) - __C[255:192] /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c VFMSUBADD213PD instruction. /// /// \param __A /// A 256-bit vector of [4 x double] containing the multiplicand. /// \param __B /// A 256-bit vector of [4 x double] containing the multiplier. /// \param __C /// A 256-bit vector of [4 x double] containing the addend/subtrahend. /// \returns A 256-bit vector of [4 x double] containing the result. static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); } #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #endif /* __FMAINTRIN_H */ /*===---- fxsrintrin.h - FXSR intrinsic ------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __FXSRINTRIN_H #define __FXSRINTRIN_H #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fxsr"))) /// Saves the XMM, MMX, MXCSR and x87 FPU registers into a 512-byte /// memory region pointed to by the input parameter \a __p. /// /// \headerfile /// /// This intrinsic corresponds to the FXSAVE instruction. /// /// \param __p /// A pointer to a 512-byte memory region. The beginning of this memory /// region should be aligned on a 16-byte boundary. static __inline__ void __DEFAULT_FN_ATTRS _fxsave(void *__p) { __builtin_ia32_fxsave(__p); } /// Restores the XMM, MMX, MXCSR and x87 FPU registers from the 512-byte /// memory region pointed to by the input parameter \a __p. The contents of /// this memory region should have been written to by a previous \c _fxsave /// or \c _fxsave64 intrinsic. /// /// \headerfile /// /// This intrinsic corresponds to the FXRSTOR instruction. /// /// \param __p /// A pointer to a 512-byte memory region. The beginning of this memory /// region should be aligned on a 16-byte boundary. static __inline__ void __DEFAULT_FN_ATTRS _fxrstor(void *__p) { __builtin_ia32_fxrstor(__p); } #ifdef __x86_64__ /// Saves the XMM, MMX, MXCSR and x87 FPU registers into a 512-byte /// memory region pointed to by the input parameter \a __p. /// /// \headerfile /// /// This intrinsic corresponds to the FXSAVE64 instruction. /// /// \param __p /// A pointer to a 512-byte memory region. The beginning of this memory /// region should be aligned on a 16-byte boundary. static __inline__ void __DEFAULT_FN_ATTRS _fxsave64(void *__p) { __builtin_ia32_fxsave64(__p); } /// Restores the XMM, MMX, MXCSR and x87 FPU registers from the 512-byte /// memory region pointed to by the input parameter \a __p. The contents of /// this memory region should have been written to by a previous \c _fxsave /// or \c _fxsave64 intrinsic. /// /// \headerfile /// /// This intrinsic corresponds to the FXRSTOR64 instruction. /// /// \param __p /// A pointer to a 512-byte memory region. The beginning of this memory /// region should be aligned on a 16-byte boundary. static __inline__ void __DEFAULT_FN_ATTRS _fxrstor64(void *__p) { __builtin_ia32_fxrstor64(__p); } #endif #undef __DEFAULT_FN_ATTRS #endif /*===----------------- gfniintrin.h - GFNI intrinsics ----------------------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __GFNIINTRIN_H #define __GFNIINTRIN_H /* Default attributes for simple form (no masking). */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("gfni"), __min_vector_width__(128))) /* Default attributes for YMM unmasked form. */ #define __DEFAULT_FN_ATTRS_Y __attribute__((__always_inline__, __nodebug__, __target__("avx,gfni"), __min_vector_width__(256))) /* Default attributes for ZMM unmasked forms. */ #define __DEFAULT_FN_ATTRS_Z __attribute__((__always_inline__, __nodebug__, __target__("avx512f,gfni"), __min_vector_width__(512))) /* Default attributes for ZMM masked forms. */ #define __DEFAULT_FN_ATTRS_Z_MASK __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"), __min_vector_width__(512))) /* Default attributes for VLX masked forms. */ #define __DEFAULT_FN_ATTRS_VL128 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS_VL256 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(256))) #define _mm_gf2p8affineinv_epi64_epi8(A, B, I) \ ((__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), \ (char)(I))) #define _mm_gf2p8affine_epi64_epi8(A, B, I) \ ((__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), \ (char)(I))) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_gf2p8mul_epi8(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A, (__v16qi) __B); } #ifdef __AVXINTRIN_H #define _mm256_gf2p8affineinv_epi64_epi8(A, B, I) \ ((__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), \ (__v32qi)(__m256i)(B), \ (char)(I))) #define _mm256_gf2p8affine_epi64_epi8(A, B, I) \ ((__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)(__m256i)(A), \ (__v32qi)(__m256i)(B), \ (char)(I))) static __inline__ __m256i __DEFAULT_FN_ATTRS_Y _mm256_gf2p8mul_epi8(__m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi((__v32qi) __A, (__v32qi) __B); } #endif /* __AVXINTRIN_H */ #ifdef __AVX512BWINTRIN_H #define _mm512_gf2p8affineinv_epi64_epi8(A, B, I) \ ((__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)(__m512i)(A), \ (__v64qi)(__m512i)(B), \ (char)(I))) #define _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \ ((__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ (__v64qi)_mm512_gf2p8affineinv_epi64_epi8(A, B, I), \ (__v64qi)(__m512i)(S))) #define _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \ _mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_si512(), \ U, A, B, I) #define _mm512_gf2p8affine_epi64_epi8(A, B, I) \ ((__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)(__m512i)(A), \ (__v64qi)(__m512i)(B), \ (char)(I))) #define _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \ ((__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ (__v64qi)_mm512_gf2p8affine_epi64_epi8((A), (B), (I)), \ (__v64qi)(__m512i)(S))) #define _mm512_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \ _mm512_mask_gf2p8affine_epi64_epi8((__m512i)_mm512_setzero_si512(), \ U, A, B, I) static __inline__ __m512i __DEFAULT_FN_ATTRS_Z _mm512_gf2p8mul_epi8(__m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi((__v64qi) __A, (__v64qi) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS_Z_MASK _mm512_mask_gf2p8mul_epi8(__m512i __S, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_selectb_512(__U, (__v64qi) _mm512_gf2p8mul_epi8(__A, __B), (__v64qi) __S); } static __inline__ __m512i __DEFAULT_FN_ATTRS_Z_MASK _mm512_maskz_gf2p8mul_epi8(__mmask64 __U, __m512i __A, __m512i __B) { return _mm512_mask_gf2p8mul_epi8((__m512i)_mm512_setzero_si512(), __U, __A, __B); } #endif /* __AVX512BWINTRIN_H */ #ifdef __AVX512VLBWINTRIN_H #define _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \ ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ (__v16qi)_mm_gf2p8affineinv_epi64_epi8(A, B, I), \ (__v16qi)(__m128i)(S))) #define _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \ _mm_mask_gf2p8affineinv_epi64_epi8((__m128i)_mm_setzero_si128(), \ U, A, B, I) #define _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \ ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ (__v32qi)_mm256_gf2p8affineinv_epi64_epi8(A, B, I), \ (__v32qi)(__m256i)(S))) #define _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \ _mm256_mask_gf2p8affineinv_epi64_epi8((__m256i)_mm256_setzero_si256(), \ U, A, B, I) #define _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \ ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ (__v16qi)_mm_gf2p8affine_epi64_epi8(A, B, I), \ (__v16qi)(__m128i)(S))) #define _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \ _mm_mask_gf2p8affine_epi64_epi8((__m128i)_mm_setzero_si128(), U, A, B, I) #define _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \ ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ (__v32qi)_mm256_gf2p8affine_epi64_epi8(A, B, I), \ (__v32qi)(__m256i)(S))) #define _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \ _mm256_mask_gf2p8affine_epi64_epi8((__m256i)_mm256_setzero_si256(), \ U, A, B, I) static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128 _mm_mask_gf2p8mul_epi8(__m128i __S, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_selectb_128(__U, (__v16qi) _mm_gf2p8mul_epi8(__A, __B), (__v16qi) __S); } static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128 _mm_maskz_gf2p8mul_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return _mm_mask_gf2p8mul_epi8((__m128i)_mm_setzero_si128(), __U, __A, __B); } static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256 _mm256_mask_gf2p8mul_epi8(__m256i __S, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_selectb_256(__U, (__v32qi) _mm256_gf2p8mul_epi8(__A, __B), (__v32qi) __S); } static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256 _mm256_maskz_gf2p8mul_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return _mm256_mask_gf2p8mul_epi8((__m256i)_mm256_setzero_si256(), __U, __A, __B); } #endif /* __AVX512VLBWINTRIN_H */ #undef __DEFAULT_FN_ATTRS #undef __DEFAULT_FN_ATTRS_Y #undef __DEFAULT_FN_ATTRS_Z #undef __DEFAULT_FN_ATTRS_VL128 #undef __DEFAULT_FN_ATTRS_VL256 #endif /* __GFNIINTRIN_H */ /builtins/hexagon_circ_brev_intrinsics.h//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef _HEXAGON_CIRC_BREV_INTRINSICS_H_ #define _HEXAGON_CIRC_BREV_INTRINSICS_H_ 1 #include #include /* Circular Load */ /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_circ_load_update_D(Word64 dst, Word64 *ptr, UWord32 incr, UWord32 bufsize, UWord32 K) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_circ_load_update_D(dest,ptr,incr,bufsize,K) \ { ptr = (int64_t *) HEXAGON_circ_ldd (ptr, &(dest), ((((K)+1)<<24)|((bufsize)<<3)), ((incr)*8)); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_circ_load_update_W(Word32 dst, Word32 *ptr, UWord32 incr, UWord32 bufsize, UWord32 K) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_circ_load_update_W(dest,ptr,incr,bufsize,K) \ { ptr = (int *) HEXAGON_circ_ldw (ptr, &(dest), (((K)<<24)|((bufsize)<<2)), ((incr)*4)); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_circ_load_update_H(Word16 dst, Word16 *ptr, UWord32 incr, UWord32 bufsize, UWord32 K) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_circ_load_update_H(dest,ptr,incr,bufsize,K) \ { ptr = (int16_t *) HEXAGON_circ_ldh (ptr, &(dest), ((((K)-1)<<24)|((bufsize)<<1)), ((incr)*2)); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_circ_load_update_UH( UWord16 dst, UWord16 *ptr, UWord32 incr, UWord32 bufsize, UWord32 K) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_circ_load_update_UH(dest,ptr,incr,bufsize,K) \ { ptr = (uint16_t *) HEXAGON_circ_lduh (ptr, &(dest), ((((K)-1)<<24)|((bufsize)<<1)), ((incr)*2)); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_circ_load_update_B(Word8 dst, Word8 *ptr, UWord32 incr, UWord32 bufsize, UWord32 K) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_circ_load_update_B(dest,ptr,incr,bufsize,K) \ { ptr = (int8_t *) HEXAGON_circ_ldb (ptr, &(dest), ((((K)-2)<<24)|(bufsize)), incr); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_circ_load_update_UB(UWord8 dst, UWord8 *ptr, UWord32 incr, UWord32 bufsize, UWord32 K) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_circ_load_update_UB(dest,ptr,incr,bufsize,K) \ { ptr = (uint8_t *) HEXAGON_circ_ldub (ptr, &(dest), ((((K)-2)<<24)|(bufsize)), incr); } /* Circular Store */ /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_circ_store_update_D(Word64 *src, Word64 *ptr, UWord32 incr, UWord32 bufsize, UWord32 K) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_circ_store_update_D(src,ptr,incr,bufsize,K) \ { ptr = (int64_t *) HEXAGON_circ_std (ptr, src, ((((K)+1)<<24)|((bufsize)<<3)), ((incr)*8)); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_circ_store_update_W(Word32 *src, Word32 *ptr, UWord32 incr, UWord32 bufsize, UWord32 K) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_circ_store_update_W(src,ptr,incr,bufsize,K) \ { ptr = (int *) HEXAGON_circ_stw (ptr, src, (((K)<<24)|((bufsize)<<2)), ((incr)*4)); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_circ_store_update_HL(Word16 *src, Word16 *ptr, UWord32 incr, UWord32 bufsize, UWord32 K) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_circ_store_update_HL(src,ptr,incr,bufsize,K) \ { ptr = (int16_t *) HEXAGON_circ_sth (ptr, src, ((((K)-1)<<24)|((bufsize)<<1)), ((incr)*2)); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_circ_store_update_HH(Word16 *src, Word16 *ptr, UWord32 incr, UWord32 bufsize, UWord32 K) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_circ_store_update_HH(src,ptr,incr,bufsize,K) \ { ptr = (int16_t *) HEXAGON_circ_sthhi (ptr, src, ((((K)-1)<<24)|((bufsize)<<1)), ((incr)*2)); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_circ_store_update_B(Word8 *src, Word8 *ptr, UWord32 I4, UWord32 bufsize, UWord64 K) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_circ_store_update_B(src,ptr,incr,bufsize,K) \ { ptr = (int8_t *) HEXAGON_circ_stb (ptr, src, ((((K)-2)<<24)|(bufsize)), incr); } /* Bit Reverse Load */ /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_bitrev_load_update_D(Word64 dst, Word64 *ptr, UWord32 Iu4) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_bitrev_load_update_D(dest,ptr,log2bufsize) \ { ptr = (int64_t *) HEXAGON_brev_ldd (ptr, &(dest), (1<<(16-((log2bufsize) + 3)))); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_bitrev_load_update_W(Word32 dst, Word32 *ptr, UWord32 Iu4) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_bitrev_load_update_W(dest,ptr,log2bufsize) \ { ptr = (int *) HEXAGON_brev_ldw (ptr, &(dest), (1<<(16-((log2bufsize) + 2)))); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_bitrev_load_update_H(Word16 dst, Word16 *ptr, UWord32 Iu4) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_bitrev_load_update_H(dest,ptr,log2bufsize) \ { ptr = (int16_t *) HEXAGON_brev_ldh (ptr, &(dest), (1<<(16-((log2bufsize) + 1)))); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_bitrev_load_update_UH(UWord16 dst, UWord16 *ptr, UWord32 Iu4) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_bitrev_load_update_UH(dest,ptr,log2bufsize) \ { ptr = (uint16_t *) HEXAGON_brev_lduh (ptr, &(dest), (1<<(16-((log2bufsize) + 1)))); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_bitrev_load_update_B(Word8 dst, Word8 *ptr, UWord32 Iu4) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_bitrev_load_update_B(dest,ptr,log2bufsize) \ { ptr = (int8_t *) HEXAGON_brev_ldb (ptr, &(dest), (1<<(16-((log2bufsize))))); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_bitrev_load_update_UB(UWord8 dst, UWord8 *ptr, UWord32 Iu4) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_bitrev_load_update_UB(dest,ptr,log2bufsize) \ { ptr = (uint8_t *) HEXAGON_brev_ldub (ptr, &(dest), (1<<(16-((log2bufsize))))); } /* Bit Reverse Store */ /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_bitrev_store_update_D(Word64 *src, Word64 *ptr, UWord32 Iu4) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_bitrev_store_update_D(src,ptr,log2bufsize) \ { ptr = (int64_t *) HEXAGON_brev_std (ptr, src, (1<<(16-((log2bufsize) + 3)))); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_bitrev_store_update_W(Word32 *src, Word32 *ptr, UWord32 Iu4) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_bitrev_store_update_W(src,ptr,log2bufsize) \ { ptr = (int *) HEXAGON_brev_stw (ptr, src, (1<<(16-((log2bufsize) + 2)))); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_bitrev_store_update_HL(Word16 *src, Word16 *ptr, Word32 Iu4) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_bitrev_store_update_HL(src,ptr,log2bufsize) \ { ptr = (int16_t *) HEXAGON_brev_sth (ptr, src, (1<<(16-((log2bufsize) + 1)))); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_bitrev_store_update_HH(Word16 *src, Word16 *ptr, UWord32 Iu4) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_bitrev_store_update_HH(src,ptr,log2bufsize) \ { ptr = (int16_t *) HEXAGON_brev_sthhi (ptr, src, (1<<(16-((log2bufsize) + 1)))); } /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: void Q6_bitrev_store_update_B(Word8 *src, Word8 *ptr, UWord32 Iu4) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #define Q6_bitrev_store_update_B(src,ptr,log2bufsize) \ { ptr = (int8_t *) HEXAGON_brev_stb (ptr, src, (1<<(16-((log2bufsize))))); } #define HEXAGON_circ_ldd __builtin_circ_ldd #define HEXAGON_circ_ldw __builtin_circ_ldw #define HEXAGON_circ_ldh __builtin_circ_ldh #define HEXAGON_circ_lduh __builtin_circ_lduh #define HEXAGON_circ_ldb __builtin_circ_ldb #define HEXAGON_circ_ldub __builtin_circ_ldub #define HEXAGON_circ_std __builtin_circ_std #define HEXAGON_circ_stw __builtin_circ_stw #define HEXAGON_circ_sth __builtin_circ_sth #define HEXAGON_circ_sthhi __builtin_circ_sthhi #define HEXAGON_circ_stb __builtin_circ_stb #define HEXAGON_brev_ldd __builtin_brev_ldd #define HEXAGON_brev_ldw __builtin_brev_ldw #define HEXAGON_brev_ldh __builtin_brev_ldh #define HEXAGON_brev_lduh __builtin_brev_lduh #define HEXAGON_brev_ldb __builtin_brev_ldb #define HEXAGON_brev_ldub __builtin_brev_ldub #define HEXAGON_brev_std __builtin_brev_std #define HEXAGON_brev_stw __builtin_brev_stw #define HEXAGON_brev_sth __builtin_brev_sth #define HEXAGON_brev_sthhi __builtin_brev_sthhi #define HEXAGON_brev_stb __builtin_brev_stb #ifdef __HVX__ /* ========================================================================== Assembly Syntax: if (Qt) vmem(Rt+#0) = Vs C Intrinsic Prototype: void Q6_vmaskedstoreq_QAV(HVX_VectorPred Qt, HVX_VectorAddress A, HVX_Vector Vs) Instruction Type: COPROC_VMEM Execution Slots: SLOT0 ========================================================================== */ #define Q6_vmaskedstoreq_QAV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaskedstoreq) /* ========================================================================== Assembly Syntax: if (!Qt) vmem(Rt+#0) = Vs C Intrinsic Prototype: void Q6_vmaskedstorenq_QAV(HVX_VectorPred Qt, HVX_VectorAddress A, HVX_Vector Vs) Instruction Type: COPROC_VMEM Execution Slots: SLOT0 ========================================================================== */ #define Q6_vmaskedstorenq_QAV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaskedstorenq) /* ========================================================================== Assembly Syntax: if (Qt) vmem(Rt+#0):nt = Vs C Intrinsic Prototype: void Q6_vmaskedstorentq_QAV(HVX_VectorPred Qt, HVX_VectorAddress A, HVX_Vector Vs) Instruction Type: COPROC_VMEM Execution Slots: SLOT0 ========================================================================== */ #define Q6_vmaskedstorentq_QAV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaskedstorentq) /* ========================================================================== Assembly Syntax: if (!Qt) vmem(Rt+#0):nt = Vs C Intrinsic Prototype: void Q6_vmaskedstorentnq_QAV(HVX_VectorPred Qt, HVX_VectorAddress A, HVX_Vector Vs) Instruction Type: COPROC_VMEM Execution Slots: SLOT0 ========================================================================== */ #define Q6_vmaskedstorentnq_QAV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaskedstorentnq) #endif #endif /* #ifndef _HEXAGON_CIRC_BREV_INTRINSICS_H_ */ #ifdef __NOT_DEFINED__ /*** comment block template ***/ /* ========================================================================== Assembly Syntax: Return=instruction() C Intrinsic Prototype: ReturnType Intrinsic(ParamType Rs, ParamType Rt) Instruction Type: InstructionType Execution Slots: SLOT0123 ========================================================================== */ #endif /*** __NOT_DEFINED__ ***/ //===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // Automatically generated file, do not edit! //===----------------------------------------------------------------------===// #ifndef __HEXAGON_PROTOS_H_ #define __HEXAGON_PROTOS_H_ 1 /* ========================================================================== Assembly Syntax: Rd32=abs(Rs32) C Intrinsic Prototype: Word32 Q6_R_abs_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_abs_R __builtin_HEXAGON_A2_abs /* ========================================================================== Assembly Syntax: Rdd32=abs(Rss32) C Intrinsic Prototype: Word64 Q6_P_abs_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_abs_P __builtin_HEXAGON_A2_absp /* ========================================================================== Assembly Syntax: Rd32=abs(Rs32):sat C Intrinsic Prototype: Word32 Q6_R_abs_R_sat(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_abs_R_sat __builtin_HEXAGON_A2_abssat /* ========================================================================== Assembly Syntax: Rd32=add(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_add_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_add_RR __builtin_HEXAGON_A2_add /* ========================================================================== Assembly Syntax: Rd32=add(Rt32.h,Rs32.h):<<16 C Intrinsic Prototype: Word32 Q6_R_add_RhRh_s16(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_RhRh_s16 __builtin_HEXAGON_A2_addh_h16_hh /* ========================================================================== Assembly Syntax: Rd32=add(Rt32.h,Rs32.l):<<16 C Intrinsic Prototype: Word32 Q6_R_add_RhRl_s16(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_RhRl_s16 __builtin_HEXAGON_A2_addh_h16_hl /* ========================================================================== Assembly Syntax: Rd32=add(Rt32.l,Rs32.h):<<16 C Intrinsic Prototype: Word32 Q6_R_add_RlRh_s16(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_RlRh_s16 __builtin_HEXAGON_A2_addh_h16_lh /* ========================================================================== Assembly Syntax: Rd32=add(Rt32.l,Rs32.l):<<16 C Intrinsic Prototype: Word32 Q6_R_add_RlRl_s16(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_RlRl_s16 __builtin_HEXAGON_A2_addh_h16_ll /* ========================================================================== Assembly Syntax: Rd32=add(Rt32.h,Rs32.h):sat:<<16 C Intrinsic Prototype: Word32 Q6_R_add_RhRh_sat_s16(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_RhRh_sat_s16 __builtin_HEXAGON_A2_addh_h16_sat_hh /* ========================================================================== Assembly Syntax: Rd32=add(Rt32.h,Rs32.l):sat:<<16 C Intrinsic Prototype: Word32 Q6_R_add_RhRl_sat_s16(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_RhRl_sat_s16 __builtin_HEXAGON_A2_addh_h16_sat_hl /* ========================================================================== Assembly Syntax: Rd32=add(Rt32.l,Rs32.h):sat:<<16 C Intrinsic Prototype: Word32 Q6_R_add_RlRh_sat_s16(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_RlRh_sat_s16 __builtin_HEXAGON_A2_addh_h16_sat_lh /* ========================================================================== Assembly Syntax: Rd32=add(Rt32.l,Rs32.l):sat:<<16 C Intrinsic Prototype: Word32 Q6_R_add_RlRl_sat_s16(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_RlRl_sat_s16 __builtin_HEXAGON_A2_addh_h16_sat_ll /* ========================================================================== Assembly Syntax: Rd32=add(Rt32.l,Rs32.h) C Intrinsic Prototype: Word32 Q6_R_add_RlRh(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_RlRh __builtin_HEXAGON_A2_addh_l16_hl /* ========================================================================== Assembly Syntax: Rd32=add(Rt32.l,Rs32.l) C Intrinsic Prototype: Word32 Q6_R_add_RlRl(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_RlRl __builtin_HEXAGON_A2_addh_l16_ll /* ========================================================================== Assembly Syntax: Rd32=add(Rt32.l,Rs32.h):sat C Intrinsic Prototype: Word32 Q6_R_add_RlRh_sat(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_RlRh_sat __builtin_HEXAGON_A2_addh_l16_sat_hl /* ========================================================================== Assembly Syntax: Rd32=add(Rt32.l,Rs32.l):sat C Intrinsic Prototype: Word32 Q6_R_add_RlRl_sat(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_RlRl_sat __builtin_HEXAGON_A2_addh_l16_sat_ll /* ========================================================================== Assembly Syntax: Rd32=add(Rs32,#s16) C Intrinsic Prototype: Word32 Q6_R_add_RI(Word32 Rs, Word32 Is16) Instruction Type: ALU32_ADDI Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_add_RI __builtin_HEXAGON_A2_addi /* ========================================================================== Assembly Syntax: Rdd32=add(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_add_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_add_PP __builtin_HEXAGON_A2_addp /* ========================================================================== Assembly Syntax: Rdd32=add(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_add_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_add_PP_sat __builtin_HEXAGON_A2_addpsat /* ========================================================================== Assembly Syntax: Rd32=add(Rs32,Rt32):sat C Intrinsic Prototype: Word32 Q6_R_add_RR_sat(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_add_RR_sat __builtin_HEXAGON_A2_addsat /* ========================================================================== Assembly Syntax: Rdd32=add(Rs32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_add_RP(Word32 Rs, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT0123 ========================================================================== */ #define Q6_P_add_RP __builtin_HEXAGON_A2_addsp /* ========================================================================== Assembly Syntax: Rd32=and(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_and_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_and_RR __builtin_HEXAGON_A2_and /* ========================================================================== Assembly Syntax: Rd32=and(Rs32,#s10) C Intrinsic Prototype: Word32 Q6_R_and_RI(Word32 Rs, Word32 Is10) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_and_RI __builtin_HEXAGON_A2_andir /* ========================================================================== Assembly Syntax: Rdd32=and(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_and_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_and_PP __builtin_HEXAGON_A2_andp /* ========================================================================== Assembly Syntax: Rd32=aslh(Rs32) C Intrinsic Prototype: Word32 Q6_R_aslh_R(Word32 Rs) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_aslh_R __builtin_HEXAGON_A2_aslh /* ========================================================================== Assembly Syntax: Rd32=asrh(Rs32) C Intrinsic Prototype: Word32 Q6_R_asrh_R(Word32 Rs) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_asrh_R __builtin_HEXAGON_A2_asrh /* ========================================================================== Assembly Syntax: Rd32=combine(Rt32.h,Rs32.h) C Intrinsic Prototype: Word32 Q6_R_combine_RhRh(Word32 Rt, Word32 Rs) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_combine_RhRh __builtin_HEXAGON_A2_combine_hh /* ========================================================================== Assembly Syntax: Rd32=combine(Rt32.h,Rs32.l) C Intrinsic Prototype: Word32 Q6_R_combine_RhRl(Word32 Rt, Word32 Rs) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_combine_RhRl __builtin_HEXAGON_A2_combine_hl /* ========================================================================== Assembly Syntax: Rd32=combine(Rt32.l,Rs32.h) C Intrinsic Prototype: Word32 Q6_R_combine_RlRh(Word32 Rt, Word32 Rs) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_combine_RlRh __builtin_HEXAGON_A2_combine_lh /* ========================================================================== Assembly Syntax: Rd32=combine(Rt32.l,Rs32.l) C Intrinsic Prototype: Word32 Q6_R_combine_RlRl(Word32 Rt, Word32 Rs) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_combine_RlRl __builtin_HEXAGON_A2_combine_ll /* ========================================================================== Assembly Syntax: Rdd32=combine(#s8,#S8) C Intrinsic Prototype: Word64 Q6_P_combine_II(Word32 Is8, Word32 IS8) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_P_combine_II __builtin_HEXAGON_A2_combineii /* ========================================================================== Assembly Syntax: Rdd32=combine(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_combine_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_P_combine_RR __builtin_HEXAGON_A2_combinew /* ========================================================================== Assembly Syntax: Rd32=max(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_max_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_max_RR __builtin_HEXAGON_A2_max /* ========================================================================== Assembly Syntax: Rdd32=max(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_max_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_max_PP __builtin_HEXAGON_A2_maxp /* ========================================================================== Assembly Syntax: Rd32=maxu(Rs32,Rt32) C Intrinsic Prototype: UWord32 Q6_R_maxu_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_maxu_RR __builtin_HEXAGON_A2_maxu /* ========================================================================== Assembly Syntax: Rdd32=maxu(Rss32,Rtt32) C Intrinsic Prototype: UWord64 Q6_P_maxu_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_maxu_PP __builtin_HEXAGON_A2_maxup /* ========================================================================== Assembly Syntax: Rd32=min(Rt32,Rs32) C Intrinsic Prototype: Word32 Q6_R_min_RR(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_min_RR __builtin_HEXAGON_A2_min /* ========================================================================== Assembly Syntax: Rdd32=min(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_min_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_min_PP __builtin_HEXAGON_A2_minp /* ========================================================================== Assembly Syntax: Rd32=minu(Rt32,Rs32) C Intrinsic Prototype: UWord32 Q6_R_minu_RR(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_minu_RR __builtin_HEXAGON_A2_minu /* ========================================================================== Assembly Syntax: Rdd32=minu(Rtt32,Rss32) C Intrinsic Prototype: UWord64 Q6_P_minu_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_minu_PP __builtin_HEXAGON_A2_minup /* ========================================================================== Assembly Syntax: Rd32=neg(Rs32) C Intrinsic Prototype: Word32 Q6_R_neg_R(Word32 Rs) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_neg_R __builtin_HEXAGON_A2_neg /* ========================================================================== Assembly Syntax: Rdd32=neg(Rss32) C Intrinsic Prototype: Word64 Q6_P_neg_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_neg_P __builtin_HEXAGON_A2_negp /* ========================================================================== Assembly Syntax: Rd32=neg(Rs32):sat C Intrinsic Prototype: Word32 Q6_R_neg_R_sat(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_neg_R_sat __builtin_HEXAGON_A2_negsat /* ========================================================================== Assembly Syntax: Rd32=not(Rs32) C Intrinsic Prototype: Word32 Q6_R_not_R(Word32 Rs) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_not_R __builtin_HEXAGON_A2_not /* ========================================================================== Assembly Syntax: Rdd32=not(Rss32) C Intrinsic Prototype: Word64 Q6_P_not_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_not_P __builtin_HEXAGON_A2_notp /* ========================================================================== Assembly Syntax: Rd32=or(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_or_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_or_RR __builtin_HEXAGON_A2_or /* ========================================================================== Assembly Syntax: Rd32=or(Rs32,#s10) C Intrinsic Prototype: Word32 Q6_R_or_RI(Word32 Rs, Word32 Is10) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_or_RI __builtin_HEXAGON_A2_orir /* ========================================================================== Assembly Syntax: Rdd32=or(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_or_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_or_PP __builtin_HEXAGON_A2_orp /* ========================================================================== Assembly Syntax: Rd32=round(Rss32):sat C Intrinsic Prototype: Word32 Q6_R_round_P_sat(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_round_P_sat __builtin_HEXAGON_A2_roundsat /* ========================================================================== Assembly Syntax: Rd32=sat(Rss32) C Intrinsic Prototype: Word32 Q6_R_sat_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sat_P __builtin_HEXAGON_A2_sat /* ========================================================================== Assembly Syntax: Rd32=satb(Rs32) C Intrinsic Prototype: Word32 Q6_R_satb_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_satb_R __builtin_HEXAGON_A2_satb /* ========================================================================== Assembly Syntax: Rd32=sath(Rs32) C Intrinsic Prototype: Word32 Q6_R_sath_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sath_R __builtin_HEXAGON_A2_sath /* ========================================================================== Assembly Syntax: Rd32=satub(Rs32) C Intrinsic Prototype: Word32 Q6_R_satub_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_satub_R __builtin_HEXAGON_A2_satub /* ========================================================================== Assembly Syntax: Rd32=satuh(Rs32) C Intrinsic Prototype: Word32 Q6_R_satuh_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_satuh_R __builtin_HEXAGON_A2_satuh /* ========================================================================== Assembly Syntax: Rd32=sub(Rt32,Rs32) C Intrinsic Prototype: Word32 Q6_R_sub_RR(Word32 Rt, Word32 Rs) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_sub_RR __builtin_HEXAGON_A2_sub /* ========================================================================== Assembly Syntax: Rd32=sub(Rt32.h,Rs32.h):<<16 C Intrinsic Prototype: Word32 Q6_R_sub_RhRh_s16(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sub_RhRh_s16 __builtin_HEXAGON_A2_subh_h16_hh /* ========================================================================== Assembly Syntax: Rd32=sub(Rt32.h,Rs32.l):<<16 C Intrinsic Prototype: Word32 Q6_R_sub_RhRl_s16(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sub_RhRl_s16 __builtin_HEXAGON_A2_subh_h16_hl /* ========================================================================== Assembly Syntax: Rd32=sub(Rt32.l,Rs32.h):<<16 C Intrinsic Prototype: Word32 Q6_R_sub_RlRh_s16(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sub_RlRh_s16 __builtin_HEXAGON_A2_subh_h16_lh /* ========================================================================== Assembly Syntax: Rd32=sub(Rt32.l,Rs32.l):<<16 C Intrinsic Prototype: Word32 Q6_R_sub_RlRl_s16(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sub_RlRl_s16 __builtin_HEXAGON_A2_subh_h16_ll /* ========================================================================== Assembly Syntax: Rd32=sub(Rt32.h,Rs32.h):sat:<<16 C Intrinsic Prototype: Word32 Q6_R_sub_RhRh_sat_s16(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sub_RhRh_sat_s16 __builtin_HEXAGON_A2_subh_h16_sat_hh /* ========================================================================== Assembly Syntax: Rd32=sub(Rt32.h,Rs32.l):sat:<<16 C Intrinsic Prototype: Word32 Q6_R_sub_RhRl_sat_s16(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sub_RhRl_sat_s16 __builtin_HEXAGON_A2_subh_h16_sat_hl /* ========================================================================== Assembly Syntax: Rd32=sub(Rt32.l,Rs32.h):sat:<<16 C Intrinsic Prototype: Word32 Q6_R_sub_RlRh_sat_s16(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sub_RlRh_sat_s16 __builtin_HEXAGON_A2_subh_h16_sat_lh /* ========================================================================== Assembly Syntax: Rd32=sub(Rt32.l,Rs32.l):sat:<<16 C Intrinsic Prototype: Word32 Q6_R_sub_RlRl_sat_s16(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sub_RlRl_sat_s16 __builtin_HEXAGON_A2_subh_h16_sat_ll /* ========================================================================== Assembly Syntax: Rd32=sub(Rt32.l,Rs32.h) C Intrinsic Prototype: Word32 Q6_R_sub_RlRh(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sub_RlRh __builtin_HEXAGON_A2_subh_l16_hl /* ========================================================================== Assembly Syntax: Rd32=sub(Rt32.l,Rs32.l) C Intrinsic Prototype: Word32 Q6_R_sub_RlRl(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sub_RlRl __builtin_HEXAGON_A2_subh_l16_ll /* ========================================================================== Assembly Syntax: Rd32=sub(Rt32.l,Rs32.h):sat C Intrinsic Prototype: Word32 Q6_R_sub_RlRh_sat(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sub_RlRh_sat __builtin_HEXAGON_A2_subh_l16_sat_hl /* ========================================================================== Assembly Syntax: Rd32=sub(Rt32.l,Rs32.l):sat C Intrinsic Prototype: Word32 Q6_R_sub_RlRl_sat(Word32 Rt, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sub_RlRl_sat __builtin_HEXAGON_A2_subh_l16_sat_ll /* ========================================================================== Assembly Syntax: Rdd32=sub(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_sub_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_sub_PP __builtin_HEXAGON_A2_subp /* ========================================================================== Assembly Syntax: Rd32=sub(#s10,Rs32) C Intrinsic Prototype: Word32 Q6_R_sub_IR(Word32 Is10, Word32 Rs) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_sub_IR __builtin_HEXAGON_A2_subri /* ========================================================================== Assembly Syntax: Rd32=sub(Rt32,Rs32):sat C Intrinsic Prototype: Word32 Q6_R_sub_RR_sat(Word32 Rt, Word32 Rs) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_sub_RR_sat __builtin_HEXAGON_A2_subsat /* ========================================================================== Assembly Syntax: Rd32=vaddh(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_vaddh_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_vaddh_RR __builtin_HEXAGON_A2_svaddh /* ========================================================================== Assembly Syntax: Rd32=vaddh(Rs32,Rt32):sat C Intrinsic Prototype: Word32 Q6_R_vaddh_RR_sat(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_vaddh_RR_sat __builtin_HEXAGON_A2_svaddhs /* ========================================================================== Assembly Syntax: Rd32=vadduh(Rs32,Rt32):sat C Intrinsic Prototype: Word32 Q6_R_vadduh_RR_sat(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_vadduh_RR_sat __builtin_HEXAGON_A2_svadduhs /* ========================================================================== Assembly Syntax: Rd32=vavgh(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_vavgh_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_vavgh_RR __builtin_HEXAGON_A2_svavgh /* ========================================================================== Assembly Syntax: Rd32=vavgh(Rs32,Rt32):rnd C Intrinsic Prototype: Word32 Q6_R_vavgh_RR_rnd(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_vavgh_RR_rnd __builtin_HEXAGON_A2_svavghs /* ========================================================================== Assembly Syntax: Rd32=vnavgh(Rt32,Rs32) C Intrinsic Prototype: Word32 Q6_R_vnavgh_RR(Word32 Rt, Word32 Rs) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_vnavgh_RR __builtin_HEXAGON_A2_svnavgh /* ========================================================================== Assembly Syntax: Rd32=vsubh(Rt32,Rs32) C Intrinsic Prototype: Word32 Q6_R_vsubh_RR(Word32 Rt, Word32 Rs) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_vsubh_RR __builtin_HEXAGON_A2_svsubh /* ========================================================================== Assembly Syntax: Rd32=vsubh(Rt32,Rs32):sat C Intrinsic Prototype: Word32 Q6_R_vsubh_RR_sat(Word32 Rt, Word32 Rs) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_vsubh_RR_sat __builtin_HEXAGON_A2_svsubhs /* ========================================================================== Assembly Syntax: Rd32=vsubuh(Rt32,Rs32):sat C Intrinsic Prototype: Word32 Q6_R_vsubuh_RR_sat(Word32 Rt, Word32 Rs) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_vsubuh_RR_sat __builtin_HEXAGON_A2_svsubuhs /* ========================================================================== Assembly Syntax: Rd32=swiz(Rs32) C Intrinsic Prototype: Word32 Q6_R_swiz_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_swiz_R __builtin_HEXAGON_A2_swiz /* ========================================================================== Assembly Syntax: Rd32=sxtb(Rs32) C Intrinsic Prototype: Word32 Q6_R_sxtb_R(Word32 Rs) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_sxtb_R __builtin_HEXAGON_A2_sxtb /* ========================================================================== Assembly Syntax: Rd32=sxth(Rs32) C Intrinsic Prototype: Word32 Q6_R_sxth_R(Word32 Rs) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_sxth_R __builtin_HEXAGON_A2_sxth /* ========================================================================== Assembly Syntax: Rdd32=sxtw(Rs32) C Intrinsic Prototype: Word64 Q6_P_sxtw_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_sxtw_R __builtin_HEXAGON_A2_sxtw /* ========================================================================== Assembly Syntax: Rd32=Rs32 C Intrinsic Prototype: Word32 Q6_R_equals_R(Word32 Rs) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_equals_R __builtin_HEXAGON_A2_tfr /* ========================================================================== Assembly Syntax: Rx32.h=#u16 C Intrinsic Prototype: Word32 Q6_Rh_equals_I(Word32 Rx, Word32 Iu16) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Rh_equals_I __builtin_HEXAGON_A2_tfrih /* ========================================================================== Assembly Syntax: Rx32.l=#u16 C Intrinsic Prototype: Word32 Q6_Rl_equals_I(Word32 Rx, Word32 Iu16) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Rl_equals_I __builtin_HEXAGON_A2_tfril /* ========================================================================== Assembly Syntax: Rdd32=Rss32 C Intrinsic Prototype: Word64 Q6_P_equals_P(Word64 Rss) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_P_equals_P __builtin_HEXAGON_A2_tfrp /* ========================================================================== Assembly Syntax: Rdd32=#s8 C Intrinsic Prototype: Word64 Q6_P_equals_I(Word32 Is8) Instruction Type: ALU64 Execution Slots: SLOT0123 ========================================================================== */ #define Q6_P_equals_I __builtin_HEXAGON_A2_tfrpi /* ========================================================================== Assembly Syntax: Rd32=#s16 C Intrinsic Prototype: Word32 Q6_R_equals_I(Word32 Is16) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_equals_I __builtin_HEXAGON_A2_tfrsi /* ========================================================================== Assembly Syntax: Rdd32=vabsh(Rss32) C Intrinsic Prototype: Word64 Q6_P_vabsh_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vabsh_P __builtin_HEXAGON_A2_vabsh /* ========================================================================== Assembly Syntax: Rdd32=vabsh(Rss32):sat C Intrinsic Prototype: Word64 Q6_P_vabsh_P_sat(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vabsh_P_sat __builtin_HEXAGON_A2_vabshsat /* ========================================================================== Assembly Syntax: Rdd32=vabsw(Rss32) C Intrinsic Prototype: Word64 Q6_P_vabsw_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vabsw_P __builtin_HEXAGON_A2_vabsw /* ========================================================================== Assembly Syntax: Rdd32=vabsw(Rss32):sat C Intrinsic Prototype: Word64 Q6_P_vabsw_P_sat(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vabsw_P_sat __builtin_HEXAGON_A2_vabswsat /* ========================================================================== Assembly Syntax: Rdd32=vaddb(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vaddb_PP(Word64 Rss, Word64 Rtt) Instruction Type: MAPPING Execution Slots: SLOT0123 ========================================================================== */ #define Q6_P_vaddb_PP __builtin_HEXAGON_A2_vaddb_map /* ========================================================================== Assembly Syntax: Rdd32=vaddh(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vaddh_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vaddh_PP __builtin_HEXAGON_A2_vaddh /* ========================================================================== Assembly Syntax: Rdd32=vaddh(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vaddh_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vaddh_PP_sat __builtin_HEXAGON_A2_vaddhs /* ========================================================================== Assembly Syntax: Rdd32=vaddub(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vaddub_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vaddub_PP __builtin_HEXAGON_A2_vaddub /* ========================================================================== Assembly Syntax: Rdd32=vaddub(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vaddub_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vaddub_PP_sat __builtin_HEXAGON_A2_vaddubs /* ========================================================================== Assembly Syntax: Rdd32=vadduh(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vadduh_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vadduh_PP_sat __builtin_HEXAGON_A2_vadduhs /* ========================================================================== Assembly Syntax: Rdd32=vaddw(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vaddw_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vaddw_PP __builtin_HEXAGON_A2_vaddw /* ========================================================================== Assembly Syntax: Rdd32=vaddw(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vaddw_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vaddw_PP_sat __builtin_HEXAGON_A2_vaddws /* ========================================================================== Assembly Syntax: Rdd32=vavgh(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vavgh_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vavgh_PP __builtin_HEXAGON_A2_vavgh /* ========================================================================== Assembly Syntax: Rdd32=vavgh(Rss32,Rtt32):crnd C Intrinsic Prototype: Word64 Q6_P_vavgh_PP_crnd(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vavgh_PP_crnd __builtin_HEXAGON_A2_vavghcr /* ========================================================================== Assembly Syntax: Rdd32=vavgh(Rss32,Rtt32):rnd C Intrinsic Prototype: Word64 Q6_P_vavgh_PP_rnd(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vavgh_PP_rnd __builtin_HEXAGON_A2_vavghr /* ========================================================================== Assembly Syntax: Rdd32=vavgub(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vavgub_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vavgub_PP __builtin_HEXAGON_A2_vavgub /* ========================================================================== Assembly Syntax: Rdd32=vavgub(Rss32,Rtt32):rnd C Intrinsic Prototype: Word64 Q6_P_vavgub_PP_rnd(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vavgub_PP_rnd __builtin_HEXAGON_A2_vavgubr /* ========================================================================== Assembly Syntax: Rdd32=vavguh(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vavguh_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vavguh_PP __builtin_HEXAGON_A2_vavguh /* ========================================================================== Assembly Syntax: Rdd32=vavguh(Rss32,Rtt32):rnd C Intrinsic Prototype: Word64 Q6_P_vavguh_PP_rnd(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vavguh_PP_rnd __builtin_HEXAGON_A2_vavguhr /* ========================================================================== Assembly Syntax: Rdd32=vavguw(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vavguw_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vavguw_PP __builtin_HEXAGON_A2_vavguw /* ========================================================================== Assembly Syntax: Rdd32=vavguw(Rss32,Rtt32):rnd C Intrinsic Prototype: Word64 Q6_P_vavguw_PP_rnd(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vavguw_PP_rnd __builtin_HEXAGON_A2_vavguwr /* ========================================================================== Assembly Syntax: Rdd32=vavgw(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vavgw_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vavgw_PP __builtin_HEXAGON_A2_vavgw /* ========================================================================== Assembly Syntax: Rdd32=vavgw(Rss32,Rtt32):crnd C Intrinsic Prototype: Word64 Q6_P_vavgw_PP_crnd(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vavgw_PP_crnd __builtin_HEXAGON_A2_vavgwcr /* ========================================================================== Assembly Syntax: Rdd32=vavgw(Rss32,Rtt32):rnd C Intrinsic Prototype: Word64 Q6_P_vavgw_PP_rnd(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vavgw_PP_rnd __builtin_HEXAGON_A2_vavgwr /* ========================================================================== Assembly Syntax: Pd4=vcmpb.eq(Rss32,Rtt32) C Intrinsic Prototype: Byte Q6_p_vcmpb_eq_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmpb_eq_PP __builtin_HEXAGON_A2_vcmpbeq /* ========================================================================== Assembly Syntax: Pd4=vcmpb.gtu(Rss32,Rtt32) C Intrinsic Prototype: Byte Q6_p_vcmpb_gtu_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmpb_gtu_PP __builtin_HEXAGON_A2_vcmpbgtu /* ========================================================================== Assembly Syntax: Pd4=vcmph.eq(Rss32,Rtt32) C Intrinsic Prototype: Byte Q6_p_vcmph_eq_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmph_eq_PP __builtin_HEXAGON_A2_vcmpheq /* ========================================================================== Assembly Syntax: Pd4=vcmph.gt(Rss32,Rtt32) C Intrinsic Prototype: Byte Q6_p_vcmph_gt_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmph_gt_PP __builtin_HEXAGON_A2_vcmphgt /* ========================================================================== Assembly Syntax: Pd4=vcmph.gtu(Rss32,Rtt32) C Intrinsic Prototype: Byte Q6_p_vcmph_gtu_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmph_gtu_PP __builtin_HEXAGON_A2_vcmphgtu /* ========================================================================== Assembly Syntax: Pd4=vcmpw.eq(Rss32,Rtt32) C Intrinsic Prototype: Byte Q6_p_vcmpw_eq_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmpw_eq_PP __builtin_HEXAGON_A2_vcmpweq /* ========================================================================== Assembly Syntax: Pd4=vcmpw.gt(Rss32,Rtt32) C Intrinsic Prototype: Byte Q6_p_vcmpw_gt_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmpw_gt_PP __builtin_HEXAGON_A2_vcmpwgt /* ========================================================================== Assembly Syntax: Pd4=vcmpw.gtu(Rss32,Rtt32) C Intrinsic Prototype: Byte Q6_p_vcmpw_gtu_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmpw_gtu_PP __builtin_HEXAGON_A2_vcmpwgtu /* ========================================================================== Assembly Syntax: Rdd32=vconj(Rss32):sat C Intrinsic Prototype: Word64 Q6_P_vconj_P_sat(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vconj_P_sat __builtin_HEXAGON_A2_vconj /* ========================================================================== Assembly Syntax: Rdd32=vmaxb(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vmaxb_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmaxb_PP __builtin_HEXAGON_A2_vmaxb /* ========================================================================== Assembly Syntax: Rdd32=vmaxh(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vmaxh_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmaxh_PP __builtin_HEXAGON_A2_vmaxh /* ========================================================================== Assembly Syntax: Rdd32=vmaxub(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vmaxub_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmaxub_PP __builtin_HEXAGON_A2_vmaxub /* ========================================================================== Assembly Syntax: Rdd32=vmaxuh(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vmaxuh_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmaxuh_PP __builtin_HEXAGON_A2_vmaxuh /* ========================================================================== Assembly Syntax: Rdd32=vmaxuw(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vmaxuw_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmaxuw_PP __builtin_HEXAGON_A2_vmaxuw /* ========================================================================== Assembly Syntax: Rdd32=vmaxw(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vmaxw_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmaxw_PP __builtin_HEXAGON_A2_vmaxw /* ========================================================================== Assembly Syntax: Rdd32=vminb(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vminb_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vminb_PP __builtin_HEXAGON_A2_vminb /* ========================================================================== Assembly Syntax: Rdd32=vminh(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vminh_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vminh_PP __builtin_HEXAGON_A2_vminh /* ========================================================================== Assembly Syntax: Rdd32=vminub(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vminub_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vminub_PP __builtin_HEXAGON_A2_vminub /* ========================================================================== Assembly Syntax: Rdd32=vminuh(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vminuh_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vminuh_PP __builtin_HEXAGON_A2_vminuh /* ========================================================================== Assembly Syntax: Rdd32=vminuw(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vminuw_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vminuw_PP __builtin_HEXAGON_A2_vminuw /* ========================================================================== Assembly Syntax: Rdd32=vminw(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vminw_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vminw_PP __builtin_HEXAGON_A2_vminw /* ========================================================================== Assembly Syntax: Rdd32=vnavgh(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vnavgh_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vnavgh_PP __builtin_HEXAGON_A2_vnavgh /* ========================================================================== Assembly Syntax: Rdd32=vnavgh(Rtt32,Rss32):crnd:sat C Intrinsic Prototype: Word64 Q6_P_vnavgh_PP_crnd_sat(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vnavgh_PP_crnd_sat __builtin_HEXAGON_A2_vnavghcr /* ========================================================================== Assembly Syntax: Rdd32=vnavgh(Rtt32,Rss32):rnd:sat C Intrinsic Prototype: Word64 Q6_P_vnavgh_PP_rnd_sat(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vnavgh_PP_rnd_sat __builtin_HEXAGON_A2_vnavghr /* ========================================================================== Assembly Syntax: Rdd32=vnavgw(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vnavgw_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vnavgw_PP __builtin_HEXAGON_A2_vnavgw /* ========================================================================== Assembly Syntax: Rdd32=vnavgw(Rtt32,Rss32):crnd:sat C Intrinsic Prototype: Word64 Q6_P_vnavgw_PP_crnd_sat(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vnavgw_PP_crnd_sat __builtin_HEXAGON_A2_vnavgwcr /* ========================================================================== Assembly Syntax: Rdd32=vnavgw(Rtt32,Rss32):rnd:sat C Intrinsic Prototype: Word64 Q6_P_vnavgw_PP_rnd_sat(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vnavgw_PP_rnd_sat __builtin_HEXAGON_A2_vnavgwr /* ========================================================================== Assembly Syntax: Rdd32=vraddub(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vraddub_PP(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vraddub_PP __builtin_HEXAGON_A2_vraddub /* ========================================================================== Assembly Syntax: Rxx32+=vraddub(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vraddubacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vraddubacc_PP __builtin_HEXAGON_A2_vraddub_acc /* ========================================================================== Assembly Syntax: Rdd32=vrsadub(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vrsadub_PP(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrsadub_PP __builtin_HEXAGON_A2_vrsadub /* ========================================================================== Assembly Syntax: Rxx32+=vrsadub(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vrsadubacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrsadubacc_PP __builtin_HEXAGON_A2_vrsadub_acc /* ========================================================================== Assembly Syntax: Rdd32=vsubb(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vsubb_PP(Word64 Rss, Word64 Rtt) Instruction Type: MAPPING Execution Slots: SLOT0123 ========================================================================== */ #define Q6_P_vsubb_PP __builtin_HEXAGON_A2_vsubb_map /* ========================================================================== Assembly Syntax: Rdd32=vsubh(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vsubh_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vsubh_PP __builtin_HEXAGON_A2_vsubh /* ========================================================================== Assembly Syntax: Rdd32=vsubh(Rtt32,Rss32):sat C Intrinsic Prototype: Word64 Q6_P_vsubh_PP_sat(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vsubh_PP_sat __builtin_HEXAGON_A2_vsubhs /* ========================================================================== Assembly Syntax: Rdd32=vsubub(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vsubub_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vsubub_PP __builtin_HEXAGON_A2_vsubub /* ========================================================================== Assembly Syntax: Rdd32=vsubub(Rtt32,Rss32):sat C Intrinsic Prototype: Word64 Q6_P_vsubub_PP_sat(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vsubub_PP_sat __builtin_HEXAGON_A2_vsububs /* ========================================================================== Assembly Syntax: Rdd32=vsubuh(Rtt32,Rss32):sat C Intrinsic Prototype: Word64 Q6_P_vsubuh_PP_sat(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vsubuh_PP_sat __builtin_HEXAGON_A2_vsubuhs /* ========================================================================== Assembly Syntax: Rdd32=vsubw(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vsubw_PP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vsubw_PP __builtin_HEXAGON_A2_vsubw /* ========================================================================== Assembly Syntax: Rdd32=vsubw(Rtt32,Rss32):sat C Intrinsic Prototype: Word64 Q6_P_vsubw_PP_sat(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vsubw_PP_sat __builtin_HEXAGON_A2_vsubws /* ========================================================================== Assembly Syntax: Rd32=xor(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_xor_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_xor_RR __builtin_HEXAGON_A2_xor /* ========================================================================== Assembly Syntax: Rdd32=xor(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_xor_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_xor_PP __builtin_HEXAGON_A2_xorp /* ========================================================================== Assembly Syntax: Rd32=zxtb(Rs32) C Intrinsic Prototype: Word32 Q6_R_zxtb_R(Word32 Rs) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_zxtb_R __builtin_HEXAGON_A2_zxtb /* ========================================================================== Assembly Syntax: Rd32=zxth(Rs32) C Intrinsic Prototype: Word32 Q6_R_zxth_R(Word32 Rs) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_zxth_R __builtin_HEXAGON_A2_zxth /* ========================================================================== Assembly Syntax: Rd32=and(Rt32,~Rs32) C Intrinsic Prototype: Word32 Q6_R_and_RnR(Word32 Rt, Word32 Rs) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_and_RnR __builtin_HEXAGON_A4_andn /* ========================================================================== Assembly Syntax: Rdd32=and(Rtt32,~Rss32) C Intrinsic Prototype: Word64 Q6_P_and_PnP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_and_PnP __builtin_HEXAGON_A4_andnp /* ========================================================================== Assembly Syntax: Rdd32=bitsplit(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_bitsplit_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_bitsplit_RR __builtin_HEXAGON_A4_bitsplit /* ========================================================================== Assembly Syntax: Rdd32=bitsplit(Rs32,#u5) C Intrinsic Prototype: Word64 Q6_P_bitsplit_RI(Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_bitsplit_RI __builtin_HEXAGON_A4_bitspliti /* ========================================================================== Assembly Syntax: Pd4=boundscheck(Rs32,Rtt32) C Intrinsic Prototype: Byte Q6_p_boundscheck_RP(Word32 Rs, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_boundscheck_RP __builtin_HEXAGON_A4_boundscheck /* ========================================================================== Assembly Syntax: Pd4=cmpb.eq(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_cmpb_eq_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_cmpb_eq_RR __builtin_HEXAGON_A4_cmpbeq /* ========================================================================== Assembly Syntax: Pd4=cmpb.eq(Rs32,#u8) C Intrinsic Prototype: Byte Q6_p_cmpb_eq_RI(Word32 Rs, Word32 Iu8) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_cmpb_eq_RI __builtin_HEXAGON_A4_cmpbeqi /* ========================================================================== Assembly Syntax: Pd4=cmpb.gt(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_cmpb_gt_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_cmpb_gt_RR __builtin_HEXAGON_A4_cmpbgt /* ========================================================================== Assembly Syntax: Pd4=cmpb.gt(Rs32,#s8) C Intrinsic Prototype: Byte Q6_p_cmpb_gt_RI(Word32 Rs, Word32 Is8) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_cmpb_gt_RI __builtin_HEXAGON_A4_cmpbgti /* ========================================================================== Assembly Syntax: Pd4=cmpb.gtu(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_cmpb_gtu_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_cmpb_gtu_RR __builtin_HEXAGON_A4_cmpbgtu /* ========================================================================== Assembly Syntax: Pd4=cmpb.gtu(Rs32,#u7) C Intrinsic Prototype: Byte Q6_p_cmpb_gtu_RI(Word32 Rs, Word32 Iu7) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_cmpb_gtu_RI __builtin_HEXAGON_A4_cmpbgtui /* ========================================================================== Assembly Syntax: Pd4=cmph.eq(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_cmph_eq_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_cmph_eq_RR __builtin_HEXAGON_A4_cmpheq /* ========================================================================== Assembly Syntax: Pd4=cmph.eq(Rs32,#s8) C Intrinsic Prototype: Byte Q6_p_cmph_eq_RI(Word32 Rs, Word32 Is8) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_cmph_eq_RI __builtin_HEXAGON_A4_cmpheqi /* ========================================================================== Assembly Syntax: Pd4=cmph.gt(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_cmph_gt_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_cmph_gt_RR __builtin_HEXAGON_A4_cmphgt /* ========================================================================== Assembly Syntax: Pd4=cmph.gt(Rs32,#s8) C Intrinsic Prototype: Byte Q6_p_cmph_gt_RI(Word32 Rs, Word32 Is8) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_cmph_gt_RI __builtin_HEXAGON_A4_cmphgti /* ========================================================================== Assembly Syntax: Pd4=cmph.gtu(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_cmph_gtu_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_cmph_gtu_RR __builtin_HEXAGON_A4_cmphgtu /* ========================================================================== Assembly Syntax: Pd4=cmph.gtu(Rs32,#u7) C Intrinsic Prototype: Byte Q6_p_cmph_gtu_RI(Word32 Rs, Word32 Iu7) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_cmph_gtu_RI __builtin_HEXAGON_A4_cmphgtui /* ========================================================================== Assembly Syntax: Rdd32=combine(#s8,Rs32) C Intrinsic Prototype: Word64 Q6_P_combine_IR(Word32 Is8, Word32 Rs) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_P_combine_IR __builtin_HEXAGON_A4_combineir /* ========================================================================== Assembly Syntax: Rdd32=combine(Rs32,#s8) C Intrinsic Prototype: Word64 Q6_P_combine_RI(Word32 Rs, Word32 Is8) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_P_combine_RI __builtin_HEXAGON_A4_combineri /* ========================================================================== Assembly Syntax: Rd32=cround(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_cround_RI(Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_cround_RI __builtin_HEXAGON_A4_cround_ri /* ========================================================================== Assembly Syntax: Rd32=cround(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_cround_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_cround_RR __builtin_HEXAGON_A4_cround_rr /* ========================================================================== Assembly Syntax: Rd32=modwrap(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_modwrap_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_modwrap_RR __builtin_HEXAGON_A4_modwrapu /* ========================================================================== Assembly Syntax: Rd32=or(Rt32,~Rs32) C Intrinsic Prototype: Word32 Q6_R_or_RnR(Word32 Rt, Word32 Rs) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_or_RnR __builtin_HEXAGON_A4_orn /* ========================================================================== Assembly Syntax: Rdd32=or(Rtt32,~Rss32) C Intrinsic Prototype: Word64 Q6_P_or_PnP(Word64 Rtt, Word64 Rss) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_or_PnP __builtin_HEXAGON_A4_ornp /* ========================================================================== Assembly Syntax: Rd32=cmp.eq(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_cmp_eq_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_cmp_eq_RR __builtin_HEXAGON_A4_rcmpeq /* ========================================================================== Assembly Syntax: Rd32=cmp.eq(Rs32,#s8) C Intrinsic Prototype: Word32 Q6_R_cmp_eq_RI(Word32 Rs, Word32 Is8) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_cmp_eq_RI __builtin_HEXAGON_A4_rcmpeqi /* ========================================================================== Assembly Syntax: Rd32=!cmp.eq(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_not_cmp_eq_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_not_cmp_eq_RR __builtin_HEXAGON_A4_rcmpneq /* ========================================================================== Assembly Syntax: Rd32=!cmp.eq(Rs32,#s8) C Intrinsic Prototype: Word32 Q6_R_not_cmp_eq_RI(Word32 Rs, Word32 Is8) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_not_cmp_eq_RI __builtin_HEXAGON_A4_rcmpneqi /* ========================================================================== Assembly Syntax: Rd32=round(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_round_RI(Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_round_RI __builtin_HEXAGON_A4_round_ri /* ========================================================================== Assembly Syntax: Rd32=round(Rs32,#u5):sat C Intrinsic Prototype: Word32 Q6_R_round_RI_sat(Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_round_RI_sat __builtin_HEXAGON_A4_round_ri_sat /* ========================================================================== Assembly Syntax: Rd32=round(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_round_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_round_RR __builtin_HEXAGON_A4_round_rr /* ========================================================================== Assembly Syntax: Rd32=round(Rs32,Rt32):sat C Intrinsic Prototype: Word32 Q6_R_round_RR_sat(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_round_RR_sat __builtin_HEXAGON_A4_round_rr_sat /* ========================================================================== Assembly Syntax: Pd4=tlbmatch(Rss32,Rt32) C Intrinsic Prototype: Byte Q6_p_tlbmatch_PR(Word64 Rss, Word32 Rt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_tlbmatch_PR __builtin_HEXAGON_A4_tlbmatch /* ========================================================================== Assembly Syntax: Pd4=any8(vcmpb.eq(Rss32,Rtt32)) C Intrinsic Prototype: Byte Q6_p_any8_vcmpb_eq_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_any8_vcmpb_eq_PP __builtin_HEXAGON_A4_vcmpbeq_any /* ========================================================================== Assembly Syntax: Pd4=vcmpb.eq(Rss32,#u8) C Intrinsic Prototype: Byte Q6_p_vcmpb_eq_PI(Word64 Rss, Word32 Iu8) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmpb_eq_PI __builtin_HEXAGON_A4_vcmpbeqi /* ========================================================================== Assembly Syntax: Pd4=vcmpb.gt(Rss32,Rtt32) C Intrinsic Prototype: Byte Q6_p_vcmpb_gt_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmpb_gt_PP __builtin_HEXAGON_A4_vcmpbgt /* ========================================================================== Assembly Syntax: Pd4=vcmpb.gt(Rss32,#s8) C Intrinsic Prototype: Byte Q6_p_vcmpb_gt_PI(Word64 Rss, Word32 Is8) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmpb_gt_PI __builtin_HEXAGON_A4_vcmpbgti /* ========================================================================== Assembly Syntax: Pd4=vcmpb.gtu(Rss32,#u7) C Intrinsic Prototype: Byte Q6_p_vcmpb_gtu_PI(Word64 Rss, Word32 Iu7) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmpb_gtu_PI __builtin_HEXAGON_A4_vcmpbgtui /* ========================================================================== Assembly Syntax: Pd4=vcmph.eq(Rss32,#s8) C Intrinsic Prototype: Byte Q6_p_vcmph_eq_PI(Word64 Rss, Word32 Is8) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmph_eq_PI __builtin_HEXAGON_A4_vcmpheqi /* ========================================================================== Assembly Syntax: Pd4=vcmph.gt(Rss32,#s8) C Intrinsic Prototype: Byte Q6_p_vcmph_gt_PI(Word64 Rss, Word32 Is8) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmph_gt_PI __builtin_HEXAGON_A4_vcmphgti /* ========================================================================== Assembly Syntax: Pd4=vcmph.gtu(Rss32,#u7) C Intrinsic Prototype: Byte Q6_p_vcmph_gtu_PI(Word64 Rss, Word32 Iu7) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmph_gtu_PI __builtin_HEXAGON_A4_vcmphgtui /* ========================================================================== Assembly Syntax: Pd4=vcmpw.eq(Rss32,#s8) C Intrinsic Prototype: Byte Q6_p_vcmpw_eq_PI(Word64 Rss, Word32 Is8) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmpw_eq_PI __builtin_HEXAGON_A4_vcmpweqi /* ========================================================================== Assembly Syntax: Pd4=vcmpw.gt(Rss32,#s8) C Intrinsic Prototype: Byte Q6_p_vcmpw_gt_PI(Word64 Rss, Word32 Is8) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmpw_gt_PI __builtin_HEXAGON_A4_vcmpwgti /* ========================================================================== Assembly Syntax: Pd4=vcmpw.gtu(Rss32,#u7) C Intrinsic Prototype: Byte Q6_p_vcmpw_gtu_PI(Word64 Rss, Word32 Iu7) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_vcmpw_gtu_PI __builtin_HEXAGON_A4_vcmpwgtui /* ========================================================================== Assembly Syntax: Rxx32=vrmaxh(Rss32,Ru32) C Intrinsic Prototype: Word64 Q6_P_vrmaxh_PR(Word64 Rxx, Word64 Rss, Word32 Ru) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmaxh_PR __builtin_HEXAGON_A4_vrmaxh /* ========================================================================== Assembly Syntax: Rxx32=vrmaxuh(Rss32,Ru32) C Intrinsic Prototype: Word64 Q6_P_vrmaxuh_PR(Word64 Rxx, Word64 Rss, Word32 Ru) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmaxuh_PR __builtin_HEXAGON_A4_vrmaxuh /* ========================================================================== Assembly Syntax: Rxx32=vrmaxuw(Rss32,Ru32) C Intrinsic Prototype: Word64 Q6_P_vrmaxuw_PR(Word64 Rxx, Word64 Rss, Word32 Ru) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmaxuw_PR __builtin_HEXAGON_A4_vrmaxuw /* ========================================================================== Assembly Syntax: Rxx32=vrmaxw(Rss32,Ru32) C Intrinsic Prototype: Word64 Q6_P_vrmaxw_PR(Word64 Rxx, Word64 Rss, Word32 Ru) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmaxw_PR __builtin_HEXAGON_A4_vrmaxw /* ========================================================================== Assembly Syntax: Rxx32=vrminh(Rss32,Ru32) C Intrinsic Prototype: Word64 Q6_P_vrminh_PR(Word64 Rxx, Word64 Rss, Word32 Ru) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrminh_PR __builtin_HEXAGON_A4_vrminh /* ========================================================================== Assembly Syntax: Rxx32=vrminuh(Rss32,Ru32) C Intrinsic Prototype: Word64 Q6_P_vrminuh_PR(Word64 Rxx, Word64 Rss, Word32 Ru) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrminuh_PR __builtin_HEXAGON_A4_vrminuh /* ========================================================================== Assembly Syntax: Rxx32=vrminuw(Rss32,Ru32) C Intrinsic Prototype: Word64 Q6_P_vrminuw_PR(Word64 Rxx, Word64 Rss, Word32 Ru) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrminuw_PR __builtin_HEXAGON_A4_vrminuw /* ========================================================================== Assembly Syntax: Rxx32=vrminw(Rss32,Ru32) C Intrinsic Prototype: Word64 Q6_P_vrminw_PR(Word64 Rxx, Word64 Rss, Word32 Ru) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrminw_PR __builtin_HEXAGON_A4_vrminw /* ========================================================================== Assembly Syntax: Rd32=vaddhub(Rss32,Rtt32):sat C Intrinsic Prototype: Word32 Q6_R_vaddhub_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vaddhub_PP_sat __builtin_HEXAGON_A5_vaddhubs /* ========================================================================== Assembly Syntax: Pd4=all8(Ps4) C Intrinsic Prototype: Byte Q6_p_all8_p(Byte Ps) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_all8_p __builtin_HEXAGON_C2_all8 /* ========================================================================== Assembly Syntax: Pd4=and(Pt4,Ps4) C Intrinsic Prototype: Byte Q6_p_and_pp(Byte Pt, Byte Ps) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_and_pp __builtin_HEXAGON_C2_and /* ========================================================================== Assembly Syntax: Pd4=and(Pt4,!Ps4) C Intrinsic Prototype: Byte Q6_p_and_pnp(Byte Pt, Byte Ps) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_and_pnp __builtin_HEXAGON_C2_andn /* ========================================================================== Assembly Syntax: Pd4=any8(Ps4) C Intrinsic Prototype: Byte Q6_p_any8_p(Byte Ps) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_any8_p __builtin_HEXAGON_C2_any8 /* ========================================================================== Assembly Syntax: Pd4=bitsclr(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_bitsclr_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_bitsclr_RR __builtin_HEXAGON_C2_bitsclr /* ========================================================================== Assembly Syntax: Pd4=bitsclr(Rs32,#u6) C Intrinsic Prototype: Byte Q6_p_bitsclr_RI(Word32 Rs, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_bitsclr_RI __builtin_HEXAGON_C2_bitsclri /* ========================================================================== Assembly Syntax: Pd4=bitsset(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_bitsset_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_bitsset_RR __builtin_HEXAGON_C2_bitsset /* ========================================================================== Assembly Syntax: Pd4=cmp.eq(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_cmp_eq_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_cmp_eq_RR __builtin_HEXAGON_C2_cmpeq /* ========================================================================== Assembly Syntax: Pd4=cmp.eq(Rs32,#s10) C Intrinsic Prototype: Byte Q6_p_cmp_eq_RI(Word32 Rs, Word32 Is10) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_cmp_eq_RI __builtin_HEXAGON_C2_cmpeqi /* ========================================================================== Assembly Syntax: Pd4=cmp.eq(Rss32,Rtt32) C Intrinsic Prototype: Byte Q6_p_cmp_eq_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_cmp_eq_PP __builtin_HEXAGON_C2_cmpeqp /* ========================================================================== Assembly Syntax: Pd4=cmp.ge(Rs32,#s8) C Intrinsic Prototype: Byte Q6_p_cmp_ge_RI(Word32 Rs, Word32 Is8) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_cmp_ge_RI __builtin_HEXAGON_C2_cmpgei /* ========================================================================== Assembly Syntax: Pd4=cmp.geu(Rs32,#u8) C Intrinsic Prototype: Byte Q6_p_cmp_geu_RI(Word32 Rs, Word32 Iu8) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_cmp_geu_RI __builtin_HEXAGON_C2_cmpgeui /* ========================================================================== Assembly Syntax: Pd4=cmp.gt(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_cmp_gt_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_cmp_gt_RR __builtin_HEXAGON_C2_cmpgt /* ========================================================================== Assembly Syntax: Pd4=cmp.gt(Rs32,#s10) C Intrinsic Prototype: Byte Q6_p_cmp_gt_RI(Word32 Rs, Word32 Is10) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_cmp_gt_RI __builtin_HEXAGON_C2_cmpgti /* ========================================================================== Assembly Syntax: Pd4=cmp.gt(Rss32,Rtt32) C Intrinsic Prototype: Byte Q6_p_cmp_gt_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_cmp_gt_PP __builtin_HEXAGON_C2_cmpgtp /* ========================================================================== Assembly Syntax: Pd4=cmp.gtu(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_cmp_gtu_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_cmp_gtu_RR __builtin_HEXAGON_C2_cmpgtu /* ========================================================================== Assembly Syntax: Pd4=cmp.gtu(Rs32,#u9) C Intrinsic Prototype: Byte Q6_p_cmp_gtu_RI(Word32 Rs, Word32 Iu9) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_cmp_gtu_RI __builtin_HEXAGON_C2_cmpgtui /* ========================================================================== Assembly Syntax: Pd4=cmp.gtu(Rss32,Rtt32) C Intrinsic Prototype: Byte Q6_p_cmp_gtu_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_cmp_gtu_PP __builtin_HEXAGON_C2_cmpgtup /* ========================================================================== Assembly Syntax: Pd4=cmp.lt(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_cmp_lt_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_cmp_lt_RR __builtin_HEXAGON_C2_cmplt /* ========================================================================== Assembly Syntax: Pd4=cmp.ltu(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_cmp_ltu_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_cmp_ltu_RR __builtin_HEXAGON_C2_cmpltu /* ========================================================================== Assembly Syntax: Rdd32=mask(Pt4) C Intrinsic Prototype: Word64 Q6_P_mask_p(Byte Pt) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mask_p __builtin_HEXAGON_C2_mask /* ========================================================================== Assembly Syntax: Rd32=mux(Pu4,Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_mux_pRR(Byte Pu, Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_mux_pRR __builtin_HEXAGON_C2_mux /* ========================================================================== Assembly Syntax: Rd32=mux(Pu4,#s8,#S8) C Intrinsic Prototype: Word32 Q6_R_mux_pII(Byte Pu, Word32 Is8, Word32 IS8) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_mux_pII __builtin_HEXAGON_C2_muxii /* ========================================================================== Assembly Syntax: Rd32=mux(Pu4,Rs32,#s8) C Intrinsic Prototype: Word32 Q6_R_mux_pRI(Byte Pu, Word32 Rs, Word32 Is8) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_mux_pRI __builtin_HEXAGON_C2_muxir /* ========================================================================== Assembly Syntax: Rd32=mux(Pu4,#s8,Rs32) C Intrinsic Prototype: Word32 Q6_R_mux_pIR(Byte Pu, Word32 Is8, Word32 Rs) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_mux_pIR __builtin_HEXAGON_C2_muxri /* ========================================================================== Assembly Syntax: Pd4=not(Ps4) C Intrinsic Prototype: Byte Q6_p_not_p(Byte Ps) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_not_p __builtin_HEXAGON_C2_not /* ========================================================================== Assembly Syntax: Pd4=or(Pt4,Ps4) C Intrinsic Prototype: Byte Q6_p_or_pp(Byte Pt, Byte Ps) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_or_pp __builtin_HEXAGON_C2_or /* ========================================================================== Assembly Syntax: Pd4=or(Pt4,!Ps4) C Intrinsic Prototype: Byte Q6_p_or_pnp(Byte Pt, Byte Ps) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_or_pnp __builtin_HEXAGON_C2_orn /* ========================================================================== Assembly Syntax: Pd4=Ps4 C Intrinsic Prototype: Byte Q6_p_equals_p(Byte Ps) Instruction Type: MAPPING Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_equals_p __builtin_HEXAGON_C2_pxfer_map /* ========================================================================== Assembly Syntax: Rd32=Ps4 C Intrinsic Prototype: Word32 Q6_R_equals_p(Byte Ps) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_equals_p __builtin_HEXAGON_C2_tfrpr /* ========================================================================== Assembly Syntax: Pd4=Rs32 C Intrinsic Prototype: Byte Q6_p_equals_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_equals_R __builtin_HEXAGON_C2_tfrrp /* ========================================================================== Assembly Syntax: Rd32=vitpack(Ps4,Pt4) C Intrinsic Prototype: Word32 Q6_R_vitpack_pp(Byte Ps, Byte Pt) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vitpack_pp __builtin_HEXAGON_C2_vitpack /* ========================================================================== Assembly Syntax: Rdd32=vmux(Pu4,Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vmux_pPP(Byte Pu, Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmux_pPP __builtin_HEXAGON_C2_vmux /* ========================================================================== Assembly Syntax: Pd4=xor(Ps4,Pt4) C Intrinsic Prototype: Byte Q6_p_xor_pp(Byte Ps, Byte Pt) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_xor_pp __builtin_HEXAGON_C2_xor /* ========================================================================== Assembly Syntax: Pd4=and(Ps4,and(Pt4,Pu4)) C Intrinsic Prototype: Byte Q6_p_and_and_ppp(Byte Ps, Byte Pt, Byte Pu) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_and_and_ppp __builtin_HEXAGON_C4_and_and /* ========================================================================== Assembly Syntax: Pd4=and(Ps4,and(Pt4,!Pu4)) C Intrinsic Prototype: Byte Q6_p_and_and_ppnp(Byte Ps, Byte Pt, Byte Pu) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_and_and_ppnp __builtin_HEXAGON_C4_and_andn /* ========================================================================== Assembly Syntax: Pd4=and(Ps4,or(Pt4,Pu4)) C Intrinsic Prototype: Byte Q6_p_and_or_ppp(Byte Ps, Byte Pt, Byte Pu) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_and_or_ppp __builtin_HEXAGON_C4_and_or /* ========================================================================== Assembly Syntax: Pd4=and(Ps4,or(Pt4,!Pu4)) C Intrinsic Prototype: Byte Q6_p_and_or_ppnp(Byte Ps, Byte Pt, Byte Pu) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_and_or_ppnp __builtin_HEXAGON_C4_and_orn /* ========================================================================== Assembly Syntax: Pd4=!cmp.gt(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_not_cmp_gt_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_not_cmp_gt_RR __builtin_HEXAGON_C4_cmplte /* ========================================================================== Assembly Syntax: Pd4=!cmp.gt(Rs32,#s10) C Intrinsic Prototype: Byte Q6_p_not_cmp_gt_RI(Word32 Rs, Word32 Is10) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_not_cmp_gt_RI __builtin_HEXAGON_C4_cmpltei /* ========================================================================== Assembly Syntax: Pd4=!cmp.gtu(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_not_cmp_gtu_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_not_cmp_gtu_RR __builtin_HEXAGON_C4_cmplteu /* ========================================================================== Assembly Syntax: Pd4=!cmp.gtu(Rs32,#u9) C Intrinsic Prototype: Byte Q6_p_not_cmp_gtu_RI(Word32 Rs, Word32 Iu9) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_not_cmp_gtu_RI __builtin_HEXAGON_C4_cmplteui /* ========================================================================== Assembly Syntax: Pd4=!cmp.eq(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_not_cmp_eq_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_not_cmp_eq_RR __builtin_HEXAGON_C4_cmpneq /* ========================================================================== Assembly Syntax: Pd4=!cmp.eq(Rs32,#s10) C Intrinsic Prototype: Byte Q6_p_not_cmp_eq_RI(Word32 Rs, Word32 Is10) Instruction Type: ALU32_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_p_not_cmp_eq_RI __builtin_HEXAGON_C4_cmpneqi /* ========================================================================== Assembly Syntax: Pd4=fastcorner9(Ps4,Pt4) C Intrinsic Prototype: Byte Q6_p_fastcorner9_pp(Byte Ps, Byte Pt) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_fastcorner9_pp __builtin_HEXAGON_C4_fastcorner9 /* ========================================================================== Assembly Syntax: Pd4=!fastcorner9(Ps4,Pt4) C Intrinsic Prototype: Byte Q6_p_not_fastcorner9_pp(Byte Ps, Byte Pt) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_not_fastcorner9_pp __builtin_HEXAGON_C4_fastcorner9_not /* ========================================================================== Assembly Syntax: Pd4=!bitsclr(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_not_bitsclr_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_not_bitsclr_RR __builtin_HEXAGON_C4_nbitsclr /* ========================================================================== Assembly Syntax: Pd4=!bitsclr(Rs32,#u6) C Intrinsic Prototype: Byte Q6_p_not_bitsclr_RI(Word32 Rs, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_not_bitsclr_RI __builtin_HEXAGON_C4_nbitsclri /* ========================================================================== Assembly Syntax: Pd4=!bitsset(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_not_bitsset_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_not_bitsset_RR __builtin_HEXAGON_C4_nbitsset /* ========================================================================== Assembly Syntax: Pd4=or(Ps4,and(Pt4,Pu4)) C Intrinsic Prototype: Byte Q6_p_or_and_ppp(Byte Ps, Byte Pt, Byte Pu) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_or_and_ppp __builtin_HEXAGON_C4_or_and /* ========================================================================== Assembly Syntax: Pd4=or(Ps4,and(Pt4,!Pu4)) C Intrinsic Prototype: Byte Q6_p_or_and_ppnp(Byte Ps, Byte Pt, Byte Pu) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_or_and_ppnp __builtin_HEXAGON_C4_or_andn /* ========================================================================== Assembly Syntax: Pd4=or(Ps4,or(Pt4,Pu4)) C Intrinsic Prototype: Byte Q6_p_or_or_ppp(Byte Ps, Byte Pt, Byte Pu) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_or_or_ppp __builtin_HEXAGON_C4_or_or /* ========================================================================== Assembly Syntax: Pd4=or(Ps4,or(Pt4,!Pu4)) C Intrinsic Prototype: Byte Q6_p_or_or_ppnp(Byte Ps, Byte Pt, Byte Pu) Instruction Type: CR Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_or_or_ppnp __builtin_HEXAGON_C4_or_orn /* ========================================================================== Assembly Syntax: Rdd32=convert_d2df(Rss32) C Intrinsic Prototype: Float64 Q6_P_convert_d2df_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_convert_d2df_P __builtin_HEXAGON_F2_conv_d2df /* ========================================================================== Assembly Syntax: Rd32=convert_d2sf(Rss32) C Intrinsic Prototype: Float32 Q6_R_convert_d2sf_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_convert_d2sf_P __builtin_HEXAGON_F2_conv_d2sf /* ========================================================================== Assembly Syntax: Rdd32=convert_df2d(Rss32) C Intrinsic Prototype: Word64 Q6_P_convert_df2d_P(Float64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_convert_df2d_P __builtin_HEXAGON_F2_conv_df2d /* ========================================================================== Assembly Syntax: Rdd32=convert_df2d(Rss32):chop C Intrinsic Prototype: Word64 Q6_P_convert_df2d_P_chop(Float64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_convert_df2d_P_chop __builtin_HEXAGON_F2_conv_df2d_chop /* ========================================================================== Assembly Syntax: Rd32=convert_df2sf(Rss32) C Intrinsic Prototype: Float32 Q6_R_convert_df2sf_P(Float64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_convert_df2sf_P __builtin_HEXAGON_F2_conv_df2sf /* ========================================================================== Assembly Syntax: Rdd32=convert_df2ud(Rss32) C Intrinsic Prototype: Word64 Q6_P_convert_df2ud_P(Float64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_convert_df2ud_P __builtin_HEXAGON_F2_conv_df2ud /* ========================================================================== Assembly Syntax: Rdd32=convert_df2ud(Rss32):chop C Intrinsic Prototype: Word64 Q6_P_convert_df2ud_P_chop(Float64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_convert_df2ud_P_chop __builtin_HEXAGON_F2_conv_df2ud_chop /* ========================================================================== Assembly Syntax: Rd32=convert_df2uw(Rss32) C Intrinsic Prototype: Word32 Q6_R_convert_df2uw_P(Float64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_convert_df2uw_P __builtin_HEXAGON_F2_conv_df2uw /* ========================================================================== Assembly Syntax: Rd32=convert_df2uw(Rss32):chop C Intrinsic Prototype: Word32 Q6_R_convert_df2uw_P_chop(Float64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_convert_df2uw_P_chop __builtin_HEXAGON_F2_conv_df2uw_chop /* ========================================================================== Assembly Syntax: Rd32=convert_df2w(Rss32) C Intrinsic Prototype: Word32 Q6_R_convert_df2w_P(Float64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_convert_df2w_P __builtin_HEXAGON_F2_conv_df2w /* ========================================================================== Assembly Syntax: Rd32=convert_df2w(Rss32):chop C Intrinsic Prototype: Word32 Q6_R_convert_df2w_P_chop(Float64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_convert_df2w_P_chop __builtin_HEXAGON_F2_conv_df2w_chop /* ========================================================================== Assembly Syntax: Rdd32=convert_sf2d(Rs32) C Intrinsic Prototype: Word64 Q6_P_convert_sf2d_R(Float32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_convert_sf2d_R __builtin_HEXAGON_F2_conv_sf2d /* ========================================================================== Assembly Syntax: Rdd32=convert_sf2d(Rs32):chop C Intrinsic Prototype: Word64 Q6_P_convert_sf2d_R_chop(Float32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_convert_sf2d_R_chop __builtin_HEXAGON_F2_conv_sf2d_chop /* ========================================================================== Assembly Syntax: Rdd32=convert_sf2df(Rs32) C Intrinsic Prototype: Float64 Q6_P_convert_sf2df_R(Float32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_convert_sf2df_R __builtin_HEXAGON_F2_conv_sf2df /* ========================================================================== Assembly Syntax: Rdd32=convert_sf2ud(Rs32) C Intrinsic Prototype: Word64 Q6_P_convert_sf2ud_R(Float32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_convert_sf2ud_R __builtin_HEXAGON_F2_conv_sf2ud /* ========================================================================== Assembly Syntax: Rdd32=convert_sf2ud(Rs32):chop C Intrinsic Prototype: Word64 Q6_P_convert_sf2ud_R_chop(Float32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_convert_sf2ud_R_chop __builtin_HEXAGON_F2_conv_sf2ud_chop /* ========================================================================== Assembly Syntax: Rd32=convert_sf2uw(Rs32) C Intrinsic Prototype: Word32 Q6_R_convert_sf2uw_R(Float32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_convert_sf2uw_R __builtin_HEXAGON_F2_conv_sf2uw /* ========================================================================== Assembly Syntax: Rd32=convert_sf2uw(Rs32):chop C Intrinsic Prototype: Word32 Q6_R_convert_sf2uw_R_chop(Float32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_convert_sf2uw_R_chop __builtin_HEXAGON_F2_conv_sf2uw_chop /* ========================================================================== Assembly Syntax: Rd32=convert_sf2w(Rs32) C Intrinsic Prototype: Word32 Q6_R_convert_sf2w_R(Float32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_convert_sf2w_R __builtin_HEXAGON_F2_conv_sf2w /* ========================================================================== Assembly Syntax: Rd32=convert_sf2w(Rs32):chop C Intrinsic Prototype: Word32 Q6_R_convert_sf2w_R_chop(Float32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_convert_sf2w_R_chop __builtin_HEXAGON_F2_conv_sf2w_chop /* ========================================================================== Assembly Syntax: Rdd32=convert_ud2df(Rss32) C Intrinsic Prototype: Float64 Q6_P_convert_ud2df_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_convert_ud2df_P __builtin_HEXAGON_F2_conv_ud2df /* ========================================================================== Assembly Syntax: Rd32=convert_ud2sf(Rss32) C Intrinsic Prototype: Float32 Q6_R_convert_ud2sf_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_convert_ud2sf_P __builtin_HEXAGON_F2_conv_ud2sf /* ========================================================================== Assembly Syntax: Rdd32=convert_uw2df(Rs32) C Intrinsic Prototype: Float64 Q6_P_convert_uw2df_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_convert_uw2df_R __builtin_HEXAGON_F2_conv_uw2df /* ========================================================================== Assembly Syntax: Rd32=convert_uw2sf(Rs32) C Intrinsic Prototype: Float32 Q6_R_convert_uw2sf_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_convert_uw2sf_R __builtin_HEXAGON_F2_conv_uw2sf /* ========================================================================== Assembly Syntax: Rdd32=convert_w2df(Rs32) C Intrinsic Prototype: Float64 Q6_P_convert_w2df_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_convert_w2df_R __builtin_HEXAGON_F2_conv_w2df /* ========================================================================== Assembly Syntax: Rd32=convert_w2sf(Rs32) C Intrinsic Prototype: Float32 Q6_R_convert_w2sf_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_convert_w2sf_R __builtin_HEXAGON_F2_conv_w2sf /* ========================================================================== Assembly Syntax: Pd4=dfclass(Rss32,#u5) C Intrinsic Prototype: Byte Q6_p_dfclass_PI(Float64 Rss, Word32 Iu5) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_dfclass_PI __builtin_HEXAGON_F2_dfclass /* ========================================================================== Assembly Syntax: Pd4=dfcmp.eq(Rss32,Rtt32) C Intrinsic Prototype: Byte Q6_p_dfcmp_eq_PP(Float64 Rss, Float64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_dfcmp_eq_PP __builtin_HEXAGON_F2_dfcmpeq /* ========================================================================== Assembly Syntax: Pd4=dfcmp.ge(Rss32,Rtt32) C Intrinsic Prototype: Byte Q6_p_dfcmp_ge_PP(Float64 Rss, Float64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_dfcmp_ge_PP __builtin_HEXAGON_F2_dfcmpge /* ========================================================================== Assembly Syntax: Pd4=dfcmp.gt(Rss32,Rtt32) C Intrinsic Prototype: Byte Q6_p_dfcmp_gt_PP(Float64 Rss, Float64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_dfcmp_gt_PP __builtin_HEXAGON_F2_dfcmpgt /* ========================================================================== Assembly Syntax: Pd4=dfcmp.uo(Rss32,Rtt32) C Intrinsic Prototype: Byte Q6_p_dfcmp_uo_PP(Float64 Rss, Float64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_dfcmp_uo_PP __builtin_HEXAGON_F2_dfcmpuo /* ========================================================================== Assembly Syntax: Rdd32=dfmake(#u10):neg C Intrinsic Prototype: Float64 Q6_P_dfmake_I_neg(Word32 Iu10) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_dfmake_I_neg __builtin_HEXAGON_F2_dfimm_n /* ========================================================================== Assembly Syntax: Rdd32=dfmake(#u10):pos C Intrinsic Prototype: Float64 Q6_P_dfmake_I_pos(Word32 Iu10) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_dfmake_I_pos __builtin_HEXAGON_F2_dfimm_p /* ========================================================================== Assembly Syntax: Rd32=sfadd(Rs32,Rt32) C Intrinsic Prototype: Float32 Q6_R_sfadd_RR(Float32 Rs, Float32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sfadd_RR __builtin_HEXAGON_F2_sfadd /* ========================================================================== Assembly Syntax: Pd4=sfclass(Rs32,#u5) C Intrinsic Prototype: Byte Q6_p_sfclass_RI(Float32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_sfclass_RI __builtin_HEXAGON_F2_sfclass /* ========================================================================== Assembly Syntax: Pd4=sfcmp.eq(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_sfcmp_eq_RR(Float32 Rs, Float32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_sfcmp_eq_RR __builtin_HEXAGON_F2_sfcmpeq /* ========================================================================== Assembly Syntax: Pd4=sfcmp.ge(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_sfcmp_ge_RR(Float32 Rs, Float32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_sfcmp_ge_RR __builtin_HEXAGON_F2_sfcmpge /* ========================================================================== Assembly Syntax: Pd4=sfcmp.gt(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_sfcmp_gt_RR(Float32 Rs, Float32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_sfcmp_gt_RR __builtin_HEXAGON_F2_sfcmpgt /* ========================================================================== Assembly Syntax: Pd4=sfcmp.uo(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_sfcmp_uo_RR(Float32 Rs, Float32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_sfcmp_uo_RR __builtin_HEXAGON_F2_sfcmpuo /* ========================================================================== Assembly Syntax: Rd32=sffixupd(Rs32,Rt32) C Intrinsic Prototype: Float32 Q6_R_sffixupd_RR(Float32 Rs, Float32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sffixupd_RR __builtin_HEXAGON_F2_sffixupd /* ========================================================================== Assembly Syntax: Rd32=sffixupn(Rs32,Rt32) C Intrinsic Prototype: Float32 Q6_R_sffixupn_RR(Float32 Rs, Float32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sffixupn_RR __builtin_HEXAGON_F2_sffixupn /* ========================================================================== Assembly Syntax: Rd32=sffixupr(Rs32) C Intrinsic Prototype: Float32 Q6_R_sffixupr_R(Float32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sffixupr_R __builtin_HEXAGON_F2_sffixupr /* ========================================================================== Assembly Syntax: Rx32+=sfmpy(Rs32,Rt32) C Intrinsic Prototype: Float32 Q6_R_sfmpyacc_RR(Float32 Rx, Float32 Rs, Float32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sfmpyacc_RR __builtin_HEXAGON_F2_sffma /* ========================================================================== Assembly Syntax: Rx32+=sfmpy(Rs32,Rt32):lib C Intrinsic Prototype: Float32 Q6_R_sfmpyacc_RR_lib(Float32 Rx, Float32 Rs, Float32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sfmpyacc_RR_lib __builtin_HEXAGON_F2_sffma_lib /* ========================================================================== Assembly Syntax: Rx32+=sfmpy(Rs32,Rt32,Pu4):scale C Intrinsic Prototype: Float32 Q6_R_sfmpyacc_RRp_scale(Float32 Rx, Float32 Rs, Float32 Rt, Byte Pu) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sfmpyacc_RRp_scale __builtin_HEXAGON_F2_sffma_sc /* ========================================================================== Assembly Syntax: Rx32-=sfmpy(Rs32,Rt32) C Intrinsic Prototype: Float32 Q6_R_sfmpynac_RR(Float32 Rx, Float32 Rs, Float32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sfmpynac_RR __builtin_HEXAGON_F2_sffms /* ========================================================================== Assembly Syntax: Rx32-=sfmpy(Rs32,Rt32):lib C Intrinsic Prototype: Float32 Q6_R_sfmpynac_RR_lib(Float32 Rx, Float32 Rs, Float32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sfmpynac_RR_lib __builtin_HEXAGON_F2_sffms_lib /* ========================================================================== Assembly Syntax: Rd32=sfmake(#u10):neg C Intrinsic Prototype: Float32 Q6_R_sfmake_I_neg(Word32 Iu10) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sfmake_I_neg __builtin_HEXAGON_F2_sfimm_n /* ========================================================================== Assembly Syntax: Rd32=sfmake(#u10):pos C Intrinsic Prototype: Float32 Q6_R_sfmake_I_pos(Word32 Iu10) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sfmake_I_pos __builtin_HEXAGON_F2_sfimm_p /* ========================================================================== Assembly Syntax: Rd32=sfmax(Rs32,Rt32) C Intrinsic Prototype: Float32 Q6_R_sfmax_RR(Float32 Rs, Float32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sfmax_RR __builtin_HEXAGON_F2_sfmax /* ========================================================================== Assembly Syntax: Rd32=sfmin(Rs32,Rt32) C Intrinsic Prototype: Float32 Q6_R_sfmin_RR(Float32 Rs, Float32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sfmin_RR __builtin_HEXAGON_F2_sfmin /* ========================================================================== Assembly Syntax: Rd32=sfmpy(Rs32,Rt32) C Intrinsic Prototype: Float32 Q6_R_sfmpy_RR(Float32 Rs, Float32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sfmpy_RR __builtin_HEXAGON_F2_sfmpy /* ========================================================================== Assembly Syntax: Rd32=sfsub(Rs32,Rt32) C Intrinsic Prototype: Float32 Q6_R_sfsub_RR(Float32 Rs, Float32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sfsub_RR __builtin_HEXAGON_F2_sfsub /* ========================================================================== Assembly Syntax: Rd32=memb(Rx32++#s4:0:circ(Mu2)) C Intrinsic Prototype: Word32 Q6_R_memb_IM_circ(void** Rx, Word32 Is4_0, Word32 Mu, void* BaseAddress) Instruction Type: LD Execution Slots: SLOT01 ========================================================================== */ #define Q6_R_memb_IM_circ __builtin_HEXAGON_L2_loadrb_pci /* ========================================================================== Assembly Syntax: Rd32=memb(Rx32++I:circ(Mu2)) C Intrinsic Prototype: Word32 Q6_R_memb_M_circ(void** Rx, Word32 Mu, void* BaseAddress) Instruction Type: LD Execution Slots: SLOT01 ========================================================================== */ #define Q6_R_memb_M_circ __builtin_HEXAGON_L2_loadrb_pcr /* ========================================================================== Assembly Syntax: Rdd32=memd(Rx32++#s4:3:circ(Mu2)) C Intrinsic Prototype: Word64 Q6_P_memd_IM_circ(void** Rx, Word32 Is4_3, Word32 Mu, void* BaseAddress) Instruction Type: LD Execution Slots: SLOT01 ========================================================================== */ #define Q6_P_memd_IM_circ __builtin_HEXAGON_L2_loadrd_pci /* ========================================================================== Assembly Syntax: Rdd32=memd(Rx32++I:circ(Mu2)) C Intrinsic Prototype: Word64 Q6_P_memd_M_circ(void** Rx, Word32 Mu, void* BaseAddress) Instruction Type: LD Execution Slots: SLOT01 ========================================================================== */ #define Q6_P_memd_M_circ __builtin_HEXAGON_L2_loadrd_pcr /* ========================================================================== Assembly Syntax: Rd32=memh(Rx32++#s4:1:circ(Mu2)) C Intrinsic Prototype: Word32 Q6_R_memh_IM_circ(void** Rx, Word32 Is4_1, Word32 Mu, void* BaseAddress) Instruction Type: LD Execution Slots: SLOT01 ========================================================================== */ #define Q6_R_memh_IM_circ __builtin_HEXAGON_L2_loadrh_pci /* ========================================================================== Assembly Syntax: Rd32=memh(Rx32++I:circ(Mu2)) C Intrinsic Prototype: Word32 Q6_R_memh_M_circ(void** Rx, Word32 Mu, void* BaseAddress) Instruction Type: LD Execution Slots: SLOT01 ========================================================================== */ #define Q6_R_memh_M_circ __builtin_HEXAGON_L2_loadrh_pcr /* ========================================================================== Assembly Syntax: Rd32=memw(Rx32++#s4:2:circ(Mu2)) C Intrinsic Prototype: Word32 Q6_R_memw_IM_circ(void** Rx, Word32 Is4_2, Word32 Mu, void* BaseAddress) Instruction Type: LD Execution Slots: SLOT01 ========================================================================== */ #define Q6_R_memw_IM_circ __builtin_HEXAGON_L2_loadri_pci /* ========================================================================== Assembly Syntax: Rd32=memw(Rx32++I:circ(Mu2)) C Intrinsic Prototype: Word32 Q6_R_memw_M_circ(void** Rx, Word32 Mu, void* BaseAddress) Instruction Type: LD Execution Slots: SLOT01 ========================================================================== */ #define Q6_R_memw_M_circ __builtin_HEXAGON_L2_loadri_pcr /* ========================================================================== Assembly Syntax: Rd32=memub(Rx32++#s4:0:circ(Mu2)) C Intrinsic Prototype: Word32 Q6_R_memub_IM_circ(void** Rx, Word32 Is4_0, Word32 Mu, void* BaseAddress) Instruction Type: LD Execution Slots: SLOT01 ========================================================================== */ #define Q6_R_memub_IM_circ __builtin_HEXAGON_L2_loadrub_pci /* ========================================================================== Assembly Syntax: Rd32=memub(Rx32++I:circ(Mu2)) C Intrinsic Prototype: Word32 Q6_R_memub_M_circ(void** Rx, Word32 Mu, void* BaseAddress) Instruction Type: LD Execution Slots: SLOT01 ========================================================================== */ #define Q6_R_memub_M_circ __builtin_HEXAGON_L2_loadrub_pcr /* ========================================================================== Assembly Syntax: Rd32=memuh(Rx32++#s4:1:circ(Mu2)) C Intrinsic Prototype: Word32 Q6_R_memuh_IM_circ(void** Rx, Word32 Is4_1, Word32 Mu, void* BaseAddress) Instruction Type: LD Execution Slots: SLOT01 ========================================================================== */ #define Q6_R_memuh_IM_circ __builtin_HEXAGON_L2_loadruh_pci /* ========================================================================== Assembly Syntax: Rd32=memuh(Rx32++I:circ(Mu2)) C Intrinsic Prototype: Word32 Q6_R_memuh_M_circ(void** Rx, Word32 Mu, void* BaseAddress) Instruction Type: LD Execution Slots: SLOT01 ========================================================================== */ #define Q6_R_memuh_M_circ __builtin_HEXAGON_L2_loadruh_pcr /* ========================================================================== Assembly Syntax: Rx32+=add(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_addacc_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_addacc_RR __builtin_HEXAGON_M2_acci /* ========================================================================== Assembly Syntax: Rx32+=add(Rs32,#s8) C Intrinsic Prototype: Word32 Q6_R_addacc_RI(Word32 Rx, Word32 Rs, Word32 Is8) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_addacc_RI __builtin_HEXAGON_M2_accii /* ========================================================================== Assembly Syntax: Rxx32+=cmpyi(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_cmpyiacc_RR(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cmpyiacc_RR __builtin_HEXAGON_M2_cmaci_s0 /* ========================================================================== Assembly Syntax: Rxx32+=cmpyr(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_cmpyracc_RR(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cmpyracc_RR __builtin_HEXAGON_M2_cmacr_s0 /* ========================================================================== Assembly Syntax: Rxx32+=cmpy(Rs32,Rt32):sat C Intrinsic Prototype: Word64 Q6_P_cmpyacc_RR_sat(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cmpyacc_RR_sat __builtin_HEXAGON_M2_cmacs_s0 /* ========================================================================== Assembly Syntax: Rxx32+=cmpy(Rs32,Rt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_cmpyacc_RR_s1_sat(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cmpyacc_RR_s1_sat __builtin_HEXAGON_M2_cmacs_s1 /* ========================================================================== Assembly Syntax: Rxx32+=cmpy(Rs32,Rt32*):sat C Intrinsic Prototype: Word64 Q6_P_cmpyacc_RR_conj_sat(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cmpyacc_RR_conj_sat __builtin_HEXAGON_M2_cmacsc_s0 /* ========================================================================== Assembly Syntax: Rxx32+=cmpy(Rs32,Rt32*):<<1:sat C Intrinsic Prototype: Word64 Q6_P_cmpyacc_RR_conj_s1_sat(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cmpyacc_RR_conj_s1_sat __builtin_HEXAGON_M2_cmacsc_s1 /* ========================================================================== Assembly Syntax: Rdd32=cmpyi(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_cmpyi_RR(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cmpyi_RR __builtin_HEXAGON_M2_cmpyi_s0 /* ========================================================================== Assembly Syntax: Rdd32=cmpyr(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_cmpyr_RR(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cmpyr_RR __builtin_HEXAGON_M2_cmpyr_s0 /* ========================================================================== Assembly Syntax: Rd32=cmpy(Rs32,Rt32):rnd:sat C Intrinsic Prototype: Word32 Q6_R_cmpy_RR_rnd_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_cmpy_RR_rnd_sat __builtin_HEXAGON_M2_cmpyrs_s0 /* ========================================================================== Assembly Syntax: Rd32=cmpy(Rs32,Rt32):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_cmpy_RR_s1_rnd_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_cmpy_RR_s1_rnd_sat __builtin_HEXAGON_M2_cmpyrs_s1 /* ========================================================================== Assembly Syntax: Rd32=cmpy(Rs32,Rt32*):rnd:sat C Intrinsic Prototype: Word32 Q6_R_cmpy_RR_conj_rnd_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_cmpy_RR_conj_rnd_sat __builtin_HEXAGON_M2_cmpyrsc_s0 /* ========================================================================== Assembly Syntax: Rd32=cmpy(Rs32,Rt32*):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_cmpy_RR_conj_s1_rnd_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_cmpy_RR_conj_s1_rnd_sat __builtin_HEXAGON_M2_cmpyrsc_s1 /* ========================================================================== Assembly Syntax: Rdd32=cmpy(Rs32,Rt32):sat C Intrinsic Prototype: Word64 Q6_P_cmpy_RR_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cmpy_RR_sat __builtin_HEXAGON_M2_cmpys_s0 /* ========================================================================== Assembly Syntax: Rdd32=cmpy(Rs32,Rt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_cmpy_RR_s1_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cmpy_RR_s1_sat __builtin_HEXAGON_M2_cmpys_s1 /* ========================================================================== Assembly Syntax: Rdd32=cmpy(Rs32,Rt32*):sat C Intrinsic Prototype: Word64 Q6_P_cmpy_RR_conj_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cmpy_RR_conj_sat __builtin_HEXAGON_M2_cmpysc_s0 /* ========================================================================== Assembly Syntax: Rdd32=cmpy(Rs32,Rt32*):<<1:sat C Intrinsic Prototype: Word64 Q6_P_cmpy_RR_conj_s1_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cmpy_RR_conj_s1_sat __builtin_HEXAGON_M2_cmpysc_s1 /* ========================================================================== Assembly Syntax: Rxx32-=cmpy(Rs32,Rt32):sat C Intrinsic Prototype: Word64 Q6_P_cmpynac_RR_sat(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cmpynac_RR_sat __builtin_HEXAGON_M2_cnacs_s0 /* ========================================================================== Assembly Syntax: Rxx32-=cmpy(Rs32,Rt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_cmpynac_RR_s1_sat(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cmpynac_RR_s1_sat __builtin_HEXAGON_M2_cnacs_s1 /* ========================================================================== Assembly Syntax: Rxx32-=cmpy(Rs32,Rt32*):sat C Intrinsic Prototype: Word64 Q6_P_cmpynac_RR_conj_sat(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cmpynac_RR_conj_sat __builtin_HEXAGON_M2_cnacsc_s0 /* ========================================================================== Assembly Syntax: Rxx32-=cmpy(Rs32,Rt32*):<<1:sat C Intrinsic Prototype: Word64 Q6_P_cmpynac_RR_conj_s1_sat(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cmpynac_RR_conj_s1_sat __builtin_HEXAGON_M2_cnacsc_s1 /* ========================================================================== Assembly Syntax: Rxx32+=mpy(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_mpyacc_RR(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyacc_RR __builtin_HEXAGON_M2_dpmpyss_acc_s0 /* ========================================================================== Assembly Syntax: Rxx32-=mpy(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_mpynac_RR(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpynac_RR __builtin_HEXAGON_M2_dpmpyss_nac_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32,Rt32):rnd C Intrinsic Prototype: Word32 Q6_R_mpy_RR_rnd(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RR_rnd __builtin_HEXAGON_M2_dpmpyss_rnd_s0 /* ========================================================================== Assembly Syntax: Rdd32=mpy(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_mpy_RR(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpy_RR __builtin_HEXAGON_M2_dpmpyss_s0 /* ========================================================================== Assembly Syntax: Rxx32+=mpyu(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RR(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyuacc_RR __builtin_HEXAGON_M2_dpmpyuu_acc_s0 /* ========================================================================== Assembly Syntax: Rxx32-=mpyu(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_mpyunac_RR(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyunac_RR __builtin_HEXAGON_M2_dpmpyuu_nac_s0 /* ========================================================================== Assembly Syntax: Rdd32=mpyu(Rs32,Rt32) C Intrinsic Prototype: UWord64 Q6_P_mpyu_RR(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyu_RR __builtin_HEXAGON_M2_dpmpyuu_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32,Rt32.h):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_mpy_RRh_s1_rnd_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RRh_s1_rnd_sat __builtin_HEXAGON_M2_hmmpyh_rs1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32,Rt32.h):<<1:sat C Intrinsic Prototype: Word32 Q6_R_mpy_RRh_s1_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RRh_s1_sat __builtin_HEXAGON_M2_hmmpyh_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32,Rt32.l):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_mpy_RRl_s1_rnd_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RRl_s1_rnd_sat __builtin_HEXAGON_M2_hmmpyl_rs1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32,Rt32.l):<<1:sat C Intrinsic Prototype: Word32 Q6_R_mpy_RRl_s1_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RRl_s1_sat __builtin_HEXAGON_M2_hmmpyl_s1 /* ========================================================================== Assembly Syntax: Rx32+=mpyi(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_mpyiacc_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyiacc_RR __builtin_HEXAGON_M2_maci /* ========================================================================== Assembly Syntax: Rx32-=mpyi(Rs32,#u8) C Intrinsic Prototype: Word32 Q6_R_mpyinac_RI(Word32 Rx, Word32 Rs, Word32 Iu8) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyinac_RI __builtin_HEXAGON_M2_macsin /* ========================================================================== Assembly Syntax: Rx32+=mpyi(Rs32,#u8) C Intrinsic Prototype: Word32 Q6_R_mpyiacc_RI(Word32 Rx, Word32 Rs, Word32 Iu8) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyiacc_RI __builtin_HEXAGON_M2_macsip /* ========================================================================== Assembly Syntax: Rxx32+=vmpywoh(Rss32,Rtt32):rnd:sat C Intrinsic Prototype: Word64 Q6_P_vmpywohacc_PP_rnd_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywohacc_PP_rnd_sat __builtin_HEXAGON_M2_mmachs_rs0 /* ========================================================================== Assembly Syntax: Rxx32+=vmpywoh(Rss32,Rtt32):<<1:rnd:sat C Intrinsic Prototype: Word64 Q6_P_vmpywohacc_PP_s1_rnd_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywohacc_PP_s1_rnd_sat __builtin_HEXAGON_M2_mmachs_rs1 /* ========================================================================== Assembly Syntax: Rxx32+=vmpywoh(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vmpywohacc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywohacc_PP_sat __builtin_HEXAGON_M2_mmachs_s0 /* ========================================================================== Assembly Syntax: Rxx32+=vmpywoh(Rss32,Rtt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vmpywohacc_PP_s1_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywohacc_PP_s1_sat __builtin_HEXAGON_M2_mmachs_s1 /* ========================================================================== Assembly Syntax: Rxx32+=vmpyweh(Rss32,Rtt32):rnd:sat C Intrinsic Prototype: Word64 Q6_P_vmpywehacc_PP_rnd_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywehacc_PP_rnd_sat __builtin_HEXAGON_M2_mmacls_rs0 /* ========================================================================== Assembly Syntax: Rxx32+=vmpyweh(Rss32,Rtt32):<<1:rnd:sat C Intrinsic Prototype: Word64 Q6_P_vmpywehacc_PP_s1_rnd_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywehacc_PP_s1_rnd_sat __builtin_HEXAGON_M2_mmacls_rs1 /* ========================================================================== Assembly Syntax: Rxx32+=vmpyweh(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vmpywehacc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywehacc_PP_sat __builtin_HEXAGON_M2_mmacls_s0 /* ========================================================================== Assembly Syntax: Rxx32+=vmpyweh(Rss32,Rtt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vmpywehacc_PP_s1_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywehacc_PP_s1_sat __builtin_HEXAGON_M2_mmacls_s1 /* ========================================================================== Assembly Syntax: Rxx32+=vmpywouh(Rss32,Rtt32):rnd:sat C Intrinsic Prototype: Word64 Q6_P_vmpywouhacc_PP_rnd_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywouhacc_PP_rnd_sat __builtin_HEXAGON_M2_mmacuhs_rs0 /* ========================================================================== Assembly Syntax: Rxx32+=vmpywouh(Rss32,Rtt32):<<1:rnd:sat C Intrinsic Prototype: Word64 Q6_P_vmpywouhacc_PP_s1_rnd_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywouhacc_PP_s1_rnd_sat __builtin_HEXAGON_M2_mmacuhs_rs1 /* ========================================================================== Assembly Syntax: Rxx32+=vmpywouh(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vmpywouhacc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywouhacc_PP_sat __builtin_HEXAGON_M2_mmacuhs_s0 /* ========================================================================== Assembly Syntax: Rxx32+=vmpywouh(Rss32,Rtt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vmpywouhacc_PP_s1_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywouhacc_PP_s1_sat __builtin_HEXAGON_M2_mmacuhs_s1 /* ========================================================================== Assembly Syntax: Rxx32+=vmpyweuh(Rss32,Rtt32):rnd:sat C Intrinsic Prototype: Word64 Q6_P_vmpyweuhacc_PP_rnd_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyweuhacc_PP_rnd_sat __builtin_HEXAGON_M2_mmaculs_rs0 /* ========================================================================== Assembly Syntax: Rxx32+=vmpyweuh(Rss32,Rtt32):<<1:rnd:sat C Intrinsic Prototype: Word64 Q6_P_vmpyweuhacc_PP_s1_rnd_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyweuhacc_PP_s1_rnd_sat __builtin_HEXAGON_M2_mmaculs_rs1 /* ========================================================================== Assembly Syntax: Rxx32+=vmpyweuh(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vmpyweuhacc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyweuhacc_PP_sat __builtin_HEXAGON_M2_mmaculs_s0 /* ========================================================================== Assembly Syntax: Rxx32+=vmpyweuh(Rss32,Rtt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vmpyweuhacc_PP_s1_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyweuhacc_PP_s1_sat __builtin_HEXAGON_M2_mmaculs_s1 /* ========================================================================== Assembly Syntax: Rdd32=vmpywoh(Rss32,Rtt32):rnd:sat C Intrinsic Prototype: Word64 Q6_P_vmpywoh_PP_rnd_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywoh_PP_rnd_sat __builtin_HEXAGON_M2_mmpyh_rs0 /* ========================================================================== Assembly Syntax: Rdd32=vmpywoh(Rss32,Rtt32):<<1:rnd:sat C Intrinsic Prototype: Word64 Q6_P_vmpywoh_PP_s1_rnd_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywoh_PP_s1_rnd_sat __builtin_HEXAGON_M2_mmpyh_rs1 /* ========================================================================== Assembly Syntax: Rdd32=vmpywoh(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vmpywoh_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywoh_PP_sat __builtin_HEXAGON_M2_mmpyh_s0 /* ========================================================================== Assembly Syntax: Rdd32=vmpywoh(Rss32,Rtt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vmpywoh_PP_s1_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywoh_PP_s1_sat __builtin_HEXAGON_M2_mmpyh_s1 /* ========================================================================== Assembly Syntax: Rdd32=vmpyweh(Rss32,Rtt32):rnd:sat C Intrinsic Prototype: Word64 Q6_P_vmpyweh_PP_rnd_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyweh_PP_rnd_sat __builtin_HEXAGON_M2_mmpyl_rs0 /* ========================================================================== Assembly Syntax: Rdd32=vmpyweh(Rss32,Rtt32):<<1:rnd:sat C Intrinsic Prototype: Word64 Q6_P_vmpyweh_PP_s1_rnd_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyweh_PP_s1_rnd_sat __builtin_HEXAGON_M2_mmpyl_rs1 /* ========================================================================== Assembly Syntax: Rdd32=vmpyweh(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vmpyweh_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyweh_PP_sat __builtin_HEXAGON_M2_mmpyl_s0 /* ========================================================================== Assembly Syntax: Rdd32=vmpyweh(Rss32,Rtt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vmpyweh_PP_s1_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyweh_PP_s1_sat __builtin_HEXAGON_M2_mmpyl_s1 /* ========================================================================== Assembly Syntax: Rdd32=vmpywouh(Rss32,Rtt32):rnd:sat C Intrinsic Prototype: Word64 Q6_P_vmpywouh_PP_rnd_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywouh_PP_rnd_sat __builtin_HEXAGON_M2_mmpyuh_rs0 /* ========================================================================== Assembly Syntax: Rdd32=vmpywouh(Rss32,Rtt32):<<1:rnd:sat C Intrinsic Prototype: Word64 Q6_P_vmpywouh_PP_s1_rnd_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywouh_PP_s1_rnd_sat __builtin_HEXAGON_M2_mmpyuh_rs1 /* ========================================================================== Assembly Syntax: Rdd32=vmpywouh(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vmpywouh_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywouh_PP_sat __builtin_HEXAGON_M2_mmpyuh_s0 /* ========================================================================== Assembly Syntax: Rdd32=vmpywouh(Rss32,Rtt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vmpywouh_PP_s1_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpywouh_PP_s1_sat __builtin_HEXAGON_M2_mmpyuh_s1 /* ========================================================================== Assembly Syntax: Rdd32=vmpyweuh(Rss32,Rtt32):rnd:sat C Intrinsic Prototype: Word64 Q6_P_vmpyweuh_PP_rnd_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyweuh_PP_rnd_sat __builtin_HEXAGON_M2_mmpyul_rs0 /* ========================================================================== Assembly Syntax: Rdd32=vmpyweuh(Rss32,Rtt32):<<1:rnd:sat C Intrinsic Prototype: Word64 Q6_P_vmpyweuh_PP_s1_rnd_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyweuh_PP_s1_rnd_sat __builtin_HEXAGON_M2_mmpyul_rs1 /* ========================================================================== Assembly Syntax: Rdd32=vmpyweuh(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vmpyweuh_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyweuh_PP_sat __builtin_HEXAGON_M2_mmpyul_s0 /* ========================================================================== Assembly Syntax: Rdd32=vmpyweuh(Rss32,Rtt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vmpyweuh_PP_s1_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyweuh_PP_s1_sat __builtin_HEXAGON_M2_mmpyul_s1 /* ========================================================================== Assembly Syntax: Rx32+=mpy(Rs32.h,Rt32.h) C Intrinsic Prototype: Word32 Q6_R_mpyacc_RhRh(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyacc_RhRh __builtin_HEXAGON_M2_mpy_acc_hh_s0 /* ========================================================================== Assembly Syntax: Rx32+=mpy(Rs32.h,Rt32.h):<<1 C Intrinsic Prototype: Word32 Q6_R_mpyacc_RhRh_s1(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyacc_RhRh_s1 __builtin_HEXAGON_M2_mpy_acc_hh_s1 /* ========================================================================== Assembly Syntax: Rx32+=mpy(Rs32.h,Rt32.l) C Intrinsic Prototype: Word32 Q6_R_mpyacc_RhRl(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyacc_RhRl __builtin_HEXAGON_M2_mpy_acc_hl_s0 /* ========================================================================== Assembly Syntax: Rx32+=mpy(Rs32.h,Rt32.l):<<1 C Intrinsic Prototype: Word32 Q6_R_mpyacc_RhRl_s1(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyacc_RhRl_s1 __builtin_HEXAGON_M2_mpy_acc_hl_s1 /* ========================================================================== Assembly Syntax: Rx32+=mpy(Rs32.l,Rt32.h) C Intrinsic Prototype: Word32 Q6_R_mpyacc_RlRh(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyacc_RlRh __builtin_HEXAGON_M2_mpy_acc_lh_s0 /* ========================================================================== Assembly Syntax: Rx32+=mpy(Rs32.l,Rt32.h):<<1 C Intrinsic Prototype: Word32 Q6_R_mpyacc_RlRh_s1(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyacc_RlRh_s1 __builtin_HEXAGON_M2_mpy_acc_lh_s1 /* ========================================================================== Assembly Syntax: Rx32+=mpy(Rs32.l,Rt32.l) C Intrinsic Prototype: Word32 Q6_R_mpyacc_RlRl(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyacc_RlRl __builtin_HEXAGON_M2_mpy_acc_ll_s0 /* ========================================================================== Assembly Syntax: Rx32+=mpy(Rs32.l,Rt32.l):<<1 C Intrinsic Prototype: Word32 Q6_R_mpyacc_RlRl_s1(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyacc_RlRl_s1 __builtin_HEXAGON_M2_mpy_acc_ll_s1 /* ========================================================================== Assembly Syntax: Rx32+=mpy(Rs32.h,Rt32.h):sat C Intrinsic Prototype: Word32 Q6_R_mpyacc_RhRh_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyacc_RhRh_sat __builtin_HEXAGON_M2_mpy_acc_sat_hh_s0 /* ========================================================================== Assembly Syntax: Rx32+=mpy(Rs32.h,Rt32.h):<<1:sat C Intrinsic Prototype: Word32 Q6_R_mpyacc_RhRh_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyacc_RhRh_s1_sat __builtin_HEXAGON_M2_mpy_acc_sat_hh_s1 /* ========================================================================== Assembly Syntax: Rx32+=mpy(Rs32.h,Rt32.l):sat C Intrinsic Prototype: Word32 Q6_R_mpyacc_RhRl_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyacc_RhRl_sat __builtin_HEXAGON_M2_mpy_acc_sat_hl_s0 /* ========================================================================== Assembly Syntax: Rx32+=mpy(Rs32.h,Rt32.l):<<1:sat C Intrinsic Prototype: Word32 Q6_R_mpyacc_RhRl_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyacc_RhRl_s1_sat __builtin_HEXAGON_M2_mpy_acc_sat_hl_s1 /* ========================================================================== Assembly Syntax: Rx32+=mpy(Rs32.l,Rt32.h):sat C Intrinsic Prototype: Word32 Q6_R_mpyacc_RlRh_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyacc_RlRh_sat __builtin_HEXAGON_M2_mpy_acc_sat_lh_s0 /* ========================================================================== Assembly Syntax: Rx32+=mpy(Rs32.l,Rt32.h):<<1:sat C Intrinsic Prototype: Word32 Q6_R_mpyacc_RlRh_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyacc_RlRh_s1_sat __builtin_HEXAGON_M2_mpy_acc_sat_lh_s1 /* ========================================================================== Assembly Syntax: Rx32+=mpy(Rs32.l,Rt32.l):sat C Intrinsic Prototype: Word32 Q6_R_mpyacc_RlRl_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyacc_RlRl_sat __builtin_HEXAGON_M2_mpy_acc_sat_ll_s0 /* ========================================================================== Assembly Syntax: Rx32+=mpy(Rs32.l,Rt32.l):<<1:sat C Intrinsic Prototype: Word32 Q6_R_mpyacc_RlRl_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyacc_RlRl_s1_sat __builtin_HEXAGON_M2_mpy_acc_sat_ll_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.h) C Intrinsic Prototype: Word32 Q6_R_mpy_RhRh(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RhRh __builtin_HEXAGON_M2_mpy_hh_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.h):<<1 C Intrinsic Prototype: Word32 Q6_R_mpy_RhRh_s1(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RhRh_s1 __builtin_HEXAGON_M2_mpy_hh_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.l) C Intrinsic Prototype: Word32 Q6_R_mpy_RhRl(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RhRl __builtin_HEXAGON_M2_mpy_hl_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.l):<<1 C Intrinsic Prototype: Word32 Q6_R_mpy_RhRl_s1(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RhRl_s1 __builtin_HEXAGON_M2_mpy_hl_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.h) C Intrinsic Prototype: Word32 Q6_R_mpy_RlRh(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RlRh __builtin_HEXAGON_M2_mpy_lh_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.h):<<1 C Intrinsic Prototype: Word32 Q6_R_mpy_RlRh_s1(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RlRh_s1 __builtin_HEXAGON_M2_mpy_lh_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.l) C Intrinsic Prototype: Word32 Q6_R_mpy_RlRl(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RlRl __builtin_HEXAGON_M2_mpy_ll_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.l):<<1 C Intrinsic Prototype: Word32 Q6_R_mpy_RlRl_s1(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RlRl_s1 __builtin_HEXAGON_M2_mpy_ll_s1 /* ========================================================================== Assembly Syntax: Rx32-=mpy(Rs32.h,Rt32.h) C Intrinsic Prototype: Word32 Q6_R_mpynac_RhRh(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpynac_RhRh __builtin_HEXAGON_M2_mpy_nac_hh_s0 /* ========================================================================== Assembly Syntax: Rx32-=mpy(Rs32.h,Rt32.h):<<1 C Intrinsic Prototype: Word32 Q6_R_mpynac_RhRh_s1(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpynac_RhRh_s1 __builtin_HEXAGON_M2_mpy_nac_hh_s1 /* ========================================================================== Assembly Syntax: Rx32-=mpy(Rs32.h,Rt32.l) C Intrinsic Prototype: Word32 Q6_R_mpynac_RhRl(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpynac_RhRl __builtin_HEXAGON_M2_mpy_nac_hl_s0 /* ========================================================================== Assembly Syntax: Rx32-=mpy(Rs32.h,Rt32.l):<<1 C Intrinsic Prototype: Word32 Q6_R_mpynac_RhRl_s1(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpynac_RhRl_s1 __builtin_HEXAGON_M2_mpy_nac_hl_s1 /* ========================================================================== Assembly Syntax: Rx32-=mpy(Rs32.l,Rt32.h) C Intrinsic Prototype: Word32 Q6_R_mpynac_RlRh(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpynac_RlRh __builtin_HEXAGON_M2_mpy_nac_lh_s0 /* ========================================================================== Assembly Syntax: Rx32-=mpy(Rs32.l,Rt32.h):<<1 C Intrinsic Prototype: Word32 Q6_R_mpynac_RlRh_s1(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpynac_RlRh_s1 __builtin_HEXAGON_M2_mpy_nac_lh_s1 /* ========================================================================== Assembly Syntax: Rx32-=mpy(Rs32.l,Rt32.l) C Intrinsic Prototype: Word32 Q6_R_mpynac_RlRl(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpynac_RlRl __builtin_HEXAGON_M2_mpy_nac_ll_s0 /* ========================================================================== Assembly Syntax: Rx32-=mpy(Rs32.l,Rt32.l):<<1 C Intrinsic Prototype: Word32 Q6_R_mpynac_RlRl_s1(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpynac_RlRl_s1 __builtin_HEXAGON_M2_mpy_nac_ll_s1 /* ========================================================================== Assembly Syntax: Rx32-=mpy(Rs32.h,Rt32.h):sat C Intrinsic Prototype: Word32 Q6_R_mpynac_RhRh_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpynac_RhRh_sat __builtin_HEXAGON_M2_mpy_nac_sat_hh_s0 /* ========================================================================== Assembly Syntax: Rx32-=mpy(Rs32.h,Rt32.h):<<1:sat C Intrinsic Prototype: Word32 Q6_R_mpynac_RhRh_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpynac_RhRh_s1_sat __builtin_HEXAGON_M2_mpy_nac_sat_hh_s1 /* ========================================================================== Assembly Syntax: Rx32-=mpy(Rs32.h,Rt32.l):sat C Intrinsic Prototype: Word32 Q6_R_mpynac_RhRl_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpynac_RhRl_sat __builtin_HEXAGON_M2_mpy_nac_sat_hl_s0 /* ========================================================================== Assembly Syntax: Rx32-=mpy(Rs32.h,Rt32.l):<<1:sat C Intrinsic Prototype: Word32 Q6_R_mpynac_RhRl_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpynac_RhRl_s1_sat __builtin_HEXAGON_M2_mpy_nac_sat_hl_s1 /* ========================================================================== Assembly Syntax: Rx32-=mpy(Rs32.l,Rt32.h):sat C Intrinsic Prototype: Word32 Q6_R_mpynac_RlRh_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpynac_RlRh_sat __builtin_HEXAGON_M2_mpy_nac_sat_lh_s0 /* ========================================================================== Assembly Syntax: Rx32-=mpy(Rs32.l,Rt32.h):<<1:sat C Intrinsic Prototype: Word32 Q6_R_mpynac_RlRh_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpynac_RlRh_s1_sat __builtin_HEXAGON_M2_mpy_nac_sat_lh_s1 /* ========================================================================== Assembly Syntax: Rx32-=mpy(Rs32.l,Rt32.l):sat C Intrinsic Prototype: Word32 Q6_R_mpynac_RlRl_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpynac_RlRl_sat __builtin_HEXAGON_M2_mpy_nac_sat_ll_s0 /* ========================================================================== Assembly Syntax: Rx32-=mpy(Rs32.l,Rt32.l):<<1:sat C Intrinsic Prototype: Word32 Q6_R_mpynac_RlRl_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpynac_RlRl_s1_sat __builtin_HEXAGON_M2_mpy_nac_sat_ll_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.h):rnd C Intrinsic Prototype: Word32 Q6_R_mpy_RhRh_rnd(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RhRh_rnd __builtin_HEXAGON_M2_mpy_rnd_hh_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.h):<<1:rnd C Intrinsic Prototype: Word32 Q6_R_mpy_RhRh_s1_rnd(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RhRh_s1_rnd __builtin_HEXAGON_M2_mpy_rnd_hh_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.l):rnd C Intrinsic Prototype: Word32 Q6_R_mpy_RhRl_rnd(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RhRl_rnd __builtin_HEXAGON_M2_mpy_rnd_hl_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.l):<<1:rnd C Intrinsic Prototype: Word32 Q6_R_mpy_RhRl_s1_rnd(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RhRl_s1_rnd __builtin_HEXAGON_M2_mpy_rnd_hl_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.h):rnd C Intrinsic Prototype: Word32 Q6_R_mpy_RlRh_rnd(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RlRh_rnd __builtin_HEXAGON_M2_mpy_rnd_lh_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.h):<<1:rnd C Intrinsic Prototype: Word32 Q6_R_mpy_RlRh_s1_rnd(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RlRh_s1_rnd __builtin_HEXAGON_M2_mpy_rnd_lh_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.l):rnd C Intrinsic Prototype: Word32 Q6_R_mpy_RlRl_rnd(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RlRl_rnd __builtin_HEXAGON_M2_mpy_rnd_ll_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.l):<<1:rnd C Intrinsic Prototype: Word32 Q6_R_mpy_RlRl_s1_rnd(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RlRl_s1_rnd __builtin_HEXAGON_M2_mpy_rnd_ll_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.h):sat C Intrinsic Prototype: Word32 Q6_R_mpy_RhRh_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RhRh_sat __builtin_HEXAGON_M2_mpy_sat_hh_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.h):<<1:sat C Intrinsic Prototype: Word32 Q6_R_mpy_RhRh_s1_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RhRh_s1_sat __builtin_HEXAGON_M2_mpy_sat_hh_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.l):sat C Intrinsic Prototype: Word32 Q6_R_mpy_RhRl_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RhRl_sat __builtin_HEXAGON_M2_mpy_sat_hl_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.l):<<1:sat C Intrinsic Prototype: Word32 Q6_R_mpy_RhRl_s1_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RhRl_s1_sat __builtin_HEXAGON_M2_mpy_sat_hl_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.h):sat C Intrinsic Prototype: Word32 Q6_R_mpy_RlRh_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RlRh_sat __builtin_HEXAGON_M2_mpy_sat_lh_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.h):<<1:sat C Intrinsic Prototype: Word32 Q6_R_mpy_RlRh_s1_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RlRh_s1_sat __builtin_HEXAGON_M2_mpy_sat_lh_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.l):sat C Intrinsic Prototype: Word32 Q6_R_mpy_RlRl_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RlRl_sat __builtin_HEXAGON_M2_mpy_sat_ll_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.l):<<1:sat C Intrinsic Prototype: Word32 Q6_R_mpy_RlRl_s1_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RlRl_s1_sat __builtin_HEXAGON_M2_mpy_sat_ll_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.h):rnd:sat C Intrinsic Prototype: Word32 Q6_R_mpy_RhRh_rnd_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RhRh_rnd_sat __builtin_HEXAGON_M2_mpy_sat_rnd_hh_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.h):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_mpy_RhRh_s1_rnd_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RhRh_s1_rnd_sat __builtin_HEXAGON_M2_mpy_sat_rnd_hh_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.l):rnd:sat C Intrinsic Prototype: Word32 Q6_R_mpy_RhRl_rnd_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RhRl_rnd_sat __builtin_HEXAGON_M2_mpy_sat_rnd_hl_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.l):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_mpy_RhRl_s1_rnd_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RhRl_s1_rnd_sat __builtin_HEXAGON_M2_mpy_sat_rnd_hl_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.h):rnd:sat C Intrinsic Prototype: Word32 Q6_R_mpy_RlRh_rnd_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RlRh_rnd_sat __builtin_HEXAGON_M2_mpy_sat_rnd_lh_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.h):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_mpy_RlRh_s1_rnd_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RlRh_s1_rnd_sat __builtin_HEXAGON_M2_mpy_sat_rnd_lh_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.l):rnd:sat C Intrinsic Prototype: Word32 Q6_R_mpy_RlRl_rnd_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RlRl_rnd_sat __builtin_HEXAGON_M2_mpy_sat_rnd_ll_s0 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.l):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_mpy_RlRl_s1_rnd_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RlRl_s1_rnd_sat __builtin_HEXAGON_M2_mpy_sat_rnd_ll_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_mpy_RR(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RR __builtin_HEXAGON_M2_mpy_up /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32,Rt32):<<1 C Intrinsic Prototype: Word32 Q6_R_mpy_RR_s1(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RR_s1 __builtin_HEXAGON_M2_mpy_up_s1 /* ========================================================================== Assembly Syntax: Rd32=mpy(Rs32,Rt32):<<1:sat C Intrinsic Prototype: Word32 Q6_R_mpy_RR_s1_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpy_RR_s1_sat __builtin_HEXAGON_M2_mpy_up_s1_sat /* ========================================================================== Assembly Syntax: Rxx32+=mpy(Rs32.h,Rt32.h) C Intrinsic Prototype: Word64 Q6_P_mpyacc_RhRh(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyacc_RhRh __builtin_HEXAGON_M2_mpyd_acc_hh_s0 /* ========================================================================== Assembly Syntax: Rxx32+=mpy(Rs32.h,Rt32.h):<<1 C Intrinsic Prototype: Word64 Q6_P_mpyacc_RhRh_s1(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyacc_RhRh_s1 __builtin_HEXAGON_M2_mpyd_acc_hh_s1 /* ========================================================================== Assembly Syntax: Rxx32+=mpy(Rs32.h,Rt32.l) C Intrinsic Prototype: Word64 Q6_P_mpyacc_RhRl(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyacc_RhRl __builtin_HEXAGON_M2_mpyd_acc_hl_s0 /* ========================================================================== Assembly Syntax: Rxx32+=mpy(Rs32.h,Rt32.l):<<1 C Intrinsic Prototype: Word64 Q6_P_mpyacc_RhRl_s1(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyacc_RhRl_s1 __builtin_HEXAGON_M2_mpyd_acc_hl_s1 /* ========================================================================== Assembly Syntax: Rxx32+=mpy(Rs32.l,Rt32.h) C Intrinsic Prototype: Word64 Q6_P_mpyacc_RlRh(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyacc_RlRh __builtin_HEXAGON_M2_mpyd_acc_lh_s0 /* ========================================================================== Assembly Syntax: Rxx32+=mpy(Rs32.l,Rt32.h):<<1 C Intrinsic Prototype: Word64 Q6_P_mpyacc_RlRh_s1(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyacc_RlRh_s1 __builtin_HEXAGON_M2_mpyd_acc_lh_s1 /* ========================================================================== Assembly Syntax: Rxx32+=mpy(Rs32.l,Rt32.l) C Intrinsic Prototype: Word64 Q6_P_mpyacc_RlRl(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyacc_RlRl __builtin_HEXAGON_M2_mpyd_acc_ll_s0 /* ========================================================================== Assembly Syntax: Rxx32+=mpy(Rs32.l,Rt32.l):<<1 C Intrinsic Prototype: Word64 Q6_P_mpyacc_RlRl_s1(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyacc_RlRl_s1 __builtin_HEXAGON_M2_mpyd_acc_ll_s1 /* ========================================================================== Assembly Syntax: Rdd32=mpy(Rs32.h,Rt32.h) C Intrinsic Prototype: Word64 Q6_P_mpy_RhRh(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpy_RhRh __builtin_HEXAGON_M2_mpyd_hh_s0 /* ========================================================================== Assembly Syntax: Rdd32=mpy(Rs32.h,Rt32.h):<<1 C Intrinsic Prototype: Word64 Q6_P_mpy_RhRh_s1(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpy_RhRh_s1 __builtin_HEXAGON_M2_mpyd_hh_s1 /* ========================================================================== Assembly Syntax: Rdd32=mpy(Rs32.h,Rt32.l) C Intrinsic Prototype: Word64 Q6_P_mpy_RhRl(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpy_RhRl __builtin_HEXAGON_M2_mpyd_hl_s0 /* ========================================================================== Assembly Syntax: Rdd32=mpy(Rs32.h,Rt32.l):<<1 C Intrinsic Prototype: Word64 Q6_P_mpy_RhRl_s1(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpy_RhRl_s1 __builtin_HEXAGON_M2_mpyd_hl_s1 /* ========================================================================== Assembly Syntax: Rdd32=mpy(Rs32.l,Rt32.h) C Intrinsic Prototype: Word64 Q6_P_mpy_RlRh(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpy_RlRh __builtin_HEXAGON_M2_mpyd_lh_s0 /* ========================================================================== Assembly Syntax: Rdd32=mpy(Rs32.l,Rt32.h):<<1 C Intrinsic Prototype: Word64 Q6_P_mpy_RlRh_s1(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpy_RlRh_s1 __builtin_HEXAGON_M2_mpyd_lh_s1 /* ========================================================================== Assembly Syntax: Rdd32=mpy(Rs32.l,Rt32.l) C Intrinsic Prototype: Word64 Q6_P_mpy_RlRl(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpy_RlRl __builtin_HEXAGON_M2_mpyd_ll_s0 /* ========================================================================== Assembly Syntax: Rdd32=mpy(Rs32.l,Rt32.l):<<1 C Intrinsic Prototype: Word64 Q6_P_mpy_RlRl_s1(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpy_RlRl_s1 __builtin_HEXAGON_M2_mpyd_ll_s1 /* ========================================================================== Assembly Syntax: Rxx32-=mpy(Rs32.h,Rt32.h) C Intrinsic Prototype: Word64 Q6_P_mpynac_RhRh(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpynac_RhRh __builtin_HEXAGON_M2_mpyd_nac_hh_s0 /* ========================================================================== Assembly Syntax: Rxx32-=mpy(Rs32.h,Rt32.h):<<1 C Intrinsic Prototype: Word64 Q6_P_mpynac_RhRh_s1(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpynac_RhRh_s1 __builtin_HEXAGON_M2_mpyd_nac_hh_s1 /* ========================================================================== Assembly Syntax: Rxx32-=mpy(Rs32.h,Rt32.l) C Intrinsic Prototype: Word64 Q6_P_mpynac_RhRl(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpynac_RhRl __builtin_HEXAGON_M2_mpyd_nac_hl_s0 /* ========================================================================== Assembly Syntax: Rxx32-=mpy(Rs32.h,Rt32.l):<<1 C Intrinsic Prototype: Word64 Q6_P_mpynac_RhRl_s1(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpynac_RhRl_s1 __builtin_HEXAGON_M2_mpyd_nac_hl_s1 /* ========================================================================== Assembly Syntax: Rxx32-=mpy(Rs32.l,Rt32.h) C Intrinsic Prototype: Word64 Q6_P_mpynac_RlRh(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpynac_RlRh __builtin_HEXAGON_M2_mpyd_nac_lh_s0 /* ========================================================================== Assembly Syntax: Rxx32-=mpy(Rs32.l,Rt32.h):<<1 C Intrinsic Prototype: Word64 Q6_P_mpynac_RlRh_s1(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpynac_RlRh_s1 __builtin_HEXAGON_M2_mpyd_nac_lh_s1 /* ========================================================================== Assembly Syntax: Rxx32-=mpy(Rs32.l,Rt32.l) C Intrinsic Prototype: Word64 Q6_P_mpynac_RlRl(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpynac_RlRl __builtin_HEXAGON_M2_mpyd_nac_ll_s0 /* ========================================================================== Assembly Syntax: Rxx32-=mpy(Rs32.l,Rt32.l):<<1 C Intrinsic Prototype: Word64 Q6_P_mpynac_RlRl_s1(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpynac_RlRl_s1 __builtin_HEXAGON_M2_mpyd_nac_ll_s1 /* ========================================================================== Assembly Syntax: Rdd32=mpy(Rs32.h,Rt32.h):rnd C Intrinsic Prototype: Word64 Q6_P_mpy_RhRh_rnd(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpy_RhRh_rnd __builtin_HEXAGON_M2_mpyd_rnd_hh_s0 /* ========================================================================== Assembly Syntax: Rdd32=mpy(Rs32.h,Rt32.h):<<1:rnd C Intrinsic Prototype: Word64 Q6_P_mpy_RhRh_s1_rnd(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpy_RhRh_s1_rnd __builtin_HEXAGON_M2_mpyd_rnd_hh_s1 /* ========================================================================== Assembly Syntax: Rdd32=mpy(Rs32.h,Rt32.l):rnd C Intrinsic Prototype: Word64 Q6_P_mpy_RhRl_rnd(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpy_RhRl_rnd __builtin_HEXAGON_M2_mpyd_rnd_hl_s0 /* ========================================================================== Assembly Syntax: Rdd32=mpy(Rs32.h,Rt32.l):<<1:rnd C Intrinsic Prototype: Word64 Q6_P_mpy_RhRl_s1_rnd(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpy_RhRl_s1_rnd __builtin_HEXAGON_M2_mpyd_rnd_hl_s1 /* ========================================================================== Assembly Syntax: Rdd32=mpy(Rs32.l,Rt32.h):rnd C Intrinsic Prototype: Word64 Q6_P_mpy_RlRh_rnd(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpy_RlRh_rnd __builtin_HEXAGON_M2_mpyd_rnd_lh_s0 /* ========================================================================== Assembly Syntax: Rdd32=mpy(Rs32.l,Rt32.h):<<1:rnd C Intrinsic Prototype: Word64 Q6_P_mpy_RlRh_s1_rnd(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpy_RlRh_s1_rnd __builtin_HEXAGON_M2_mpyd_rnd_lh_s1 /* ========================================================================== Assembly Syntax: Rdd32=mpy(Rs32.l,Rt32.l):rnd C Intrinsic Prototype: Word64 Q6_P_mpy_RlRl_rnd(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpy_RlRl_rnd __builtin_HEXAGON_M2_mpyd_rnd_ll_s0 /* ========================================================================== Assembly Syntax: Rdd32=mpy(Rs32.l,Rt32.l):<<1:rnd C Intrinsic Prototype: Word64 Q6_P_mpy_RlRl_s1_rnd(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpy_RlRl_s1_rnd __builtin_HEXAGON_M2_mpyd_rnd_ll_s1 /* ========================================================================== Assembly Syntax: Rd32=mpyi(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_mpyi_RR(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyi_RR __builtin_HEXAGON_M2_mpyi /* ========================================================================== Assembly Syntax: Rd32=mpyi(Rs32,#m9) C Intrinsic Prototype: Word32 Q6_R_mpyi_RI(Word32 Rs, Word32 Im9) Instruction Type: M Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_mpyi_RI __builtin_HEXAGON_M2_mpysmi /* ========================================================================== Assembly Syntax: Rd32=mpysu(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_mpysu_RR(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpysu_RR __builtin_HEXAGON_M2_mpysu_up /* ========================================================================== Assembly Syntax: Rx32+=mpyu(Rs32.h,Rt32.h) C Intrinsic Prototype: Word32 Q6_R_mpyuacc_RhRh(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyuacc_RhRh __builtin_HEXAGON_M2_mpyu_acc_hh_s0 /* ========================================================================== Assembly Syntax: Rx32+=mpyu(Rs32.h,Rt32.h):<<1 C Intrinsic Prototype: Word32 Q6_R_mpyuacc_RhRh_s1(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyuacc_RhRh_s1 __builtin_HEXAGON_M2_mpyu_acc_hh_s1 /* ========================================================================== Assembly Syntax: Rx32+=mpyu(Rs32.h,Rt32.l) C Intrinsic Prototype: Word32 Q6_R_mpyuacc_RhRl(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyuacc_RhRl __builtin_HEXAGON_M2_mpyu_acc_hl_s0 /* ========================================================================== Assembly Syntax: Rx32+=mpyu(Rs32.h,Rt32.l):<<1 C Intrinsic Prototype: Word32 Q6_R_mpyuacc_RhRl_s1(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyuacc_RhRl_s1 __builtin_HEXAGON_M2_mpyu_acc_hl_s1 /* ========================================================================== Assembly Syntax: Rx32+=mpyu(Rs32.l,Rt32.h) C Intrinsic Prototype: Word32 Q6_R_mpyuacc_RlRh(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyuacc_RlRh __builtin_HEXAGON_M2_mpyu_acc_lh_s0 /* ========================================================================== Assembly Syntax: Rx32+=mpyu(Rs32.l,Rt32.h):<<1 C Intrinsic Prototype: Word32 Q6_R_mpyuacc_RlRh_s1(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyuacc_RlRh_s1 __builtin_HEXAGON_M2_mpyu_acc_lh_s1 /* ========================================================================== Assembly Syntax: Rx32+=mpyu(Rs32.l,Rt32.l) C Intrinsic Prototype: Word32 Q6_R_mpyuacc_RlRl(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyuacc_RlRl __builtin_HEXAGON_M2_mpyu_acc_ll_s0 /* ========================================================================== Assembly Syntax: Rx32+=mpyu(Rs32.l,Rt32.l):<<1 C Intrinsic Prototype: Word32 Q6_R_mpyuacc_RlRl_s1(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyuacc_RlRl_s1 __builtin_HEXAGON_M2_mpyu_acc_ll_s1 /* ========================================================================== Assembly Syntax: Rd32=mpyu(Rs32.h,Rt32.h) C Intrinsic Prototype: UWord32 Q6_R_mpyu_RhRh(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyu_RhRh __builtin_HEXAGON_M2_mpyu_hh_s0 /* ========================================================================== Assembly Syntax: Rd32=mpyu(Rs32.h,Rt32.h):<<1 C Intrinsic Prototype: UWord32 Q6_R_mpyu_RhRh_s1(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyu_RhRh_s1 __builtin_HEXAGON_M2_mpyu_hh_s1 /* ========================================================================== Assembly Syntax: Rd32=mpyu(Rs32.h,Rt32.l) C Intrinsic Prototype: UWord32 Q6_R_mpyu_RhRl(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyu_RhRl __builtin_HEXAGON_M2_mpyu_hl_s0 /* ========================================================================== Assembly Syntax: Rd32=mpyu(Rs32.h,Rt32.l):<<1 C Intrinsic Prototype: UWord32 Q6_R_mpyu_RhRl_s1(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyu_RhRl_s1 __builtin_HEXAGON_M2_mpyu_hl_s1 /* ========================================================================== Assembly Syntax: Rd32=mpyu(Rs32.l,Rt32.h) C Intrinsic Prototype: UWord32 Q6_R_mpyu_RlRh(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyu_RlRh __builtin_HEXAGON_M2_mpyu_lh_s0 /* ========================================================================== Assembly Syntax: Rd32=mpyu(Rs32.l,Rt32.h):<<1 C Intrinsic Prototype: UWord32 Q6_R_mpyu_RlRh_s1(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyu_RlRh_s1 __builtin_HEXAGON_M2_mpyu_lh_s1 /* ========================================================================== Assembly Syntax: Rd32=mpyu(Rs32.l,Rt32.l) C Intrinsic Prototype: UWord32 Q6_R_mpyu_RlRl(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyu_RlRl __builtin_HEXAGON_M2_mpyu_ll_s0 /* ========================================================================== Assembly Syntax: Rd32=mpyu(Rs32.l,Rt32.l):<<1 C Intrinsic Prototype: UWord32 Q6_R_mpyu_RlRl_s1(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyu_RlRl_s1 __builtin_HEXAGON_M2_mpyu_ll_s1 /* ========================================================================== Assembly Syntax: Rx32-=mpyu(Rs32.h,Rt32.h) C Intrinsic Prototype: Word32 Q6_R_mpyunac_RhRh(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyunac_RhRh __builtin_HEXAGON_M2_mpyu_nac_hh_s0 /* ========================================================================== Assembly Syntax: Rx32-=mpyu(Rs32.h,Rt32.h):<<1 C Intrinsic Prototype: Word32 Q6_R_mpyunac_RhRh_s1(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyunac_RhRh_s1 __builtin_HEXAGON_M2_mpyu_nac_hh_s1 /* ========================================================================== Assembly Syntax: Rx32-=mpyu(Rs32.h,Rt32.l) C Intrinsic Prototype: Word32 Q6_R_mpyunac_RhRl(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyunac_RhRl __builtin_HEXAGON_M2_mpyu_nac_hl_s0 /* ========================================================================== Assembly Syntax: Rx32-=mpyu(Rs32.h,Rt32.l):<<1 C Intrinsic Prototype: Word32 Q6_R_mpyunac_RhRl_s1(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyunac_RhRl_s1 __builtin_HEXAGON_M2_mpyu_nac_hl_s1 /* ========================================================================== Assembly Syntax: Rx32-=mpyu(Rs32.l,Rt32.h) C Intrinsic Prototype: Word32 Q6_R_mpyunac_RlRh(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyunac_RlRh __builtin_HEXAGON_M2_mpyu_nac_lh_s0 /* ========================================================================== Assembly Syntax: Rx32-=mpyu(Rs32.l,Rt32.h):<<1 C Intrinsic Prototype: Word32 Q6_R_mpyunac_RlRh_s1(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyunac_RlRh_s1 __builtin_HEXAGON_M2_mpyu_nac_lh_s1 /* ========================================================================== Assembly Syntax: Rx32-=mpyu(Rs32.l,Rt32.l) C Intrinsic Prototype: Word32 Q6_R_mpyunac_RlRl(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyunac_RlRl __builtin_HEXAGON_M2_mpyu_nac_ll_s0 /* ========================================================================== Assembly Syntax: Rx32-=mpyu(Rs32.l,Rt32.l):<<1 C Intrinsic Prototype: Word32 Q6_R_mpyunac_RlRl_s1(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyunac_RlRl_s1 __builtin_HEXAGON_M2_mpyu_nac_ll_s1 /* ========================================================================== Assembly Syntax: Rd32=mpyu(Rs32,Rt32) C Intrinsic Prototype: UWord32 Q6_R_mpyu_RR(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyu_RR __builtin_HEXAGON_M2_mpyu_up /* ========================================================================== Assembly Syntax: Rxx32+=mpyu(Rs32.h,Rt32.h) C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RhRh(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyuacc_RhRh __builtin_HEXAGON_M2_mpyud_acc_hh_s0 /* ========================================================================== Assembly Syntax: Rxx32+=mpyu(Rs32.h,Rt32.h):<<1 C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RhRh_s1(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyuacc_RhRh_s1 __builtin_HEXAGON_M2_mpyud_acc_hh_s1 /* ========================================================================== Assembly Syntax: Rxx32+=mpyu(Rs32.h,Rt32.l) C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RhRl(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyuacc_RhRl __builtin_HEXAGON_M2_mpyud_acc_hl_s0 /* ========================================================================== Assembly Syntax: Rxx32+=mpyu(Rs32.h,Rt32.l):<<1 C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RhRl_s1(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyuacc_RhRl_s1 __builtin_HEXAGON_M2_mpyud_acc_hl_s1 /* ========================================================================== Assembly Syntax: Rxx32+=mpyu(Rs32.l,Rt32.h) C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RlRh(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyuacc_RlRh __builtin_HEXAGON_M2_mpyud_acc_lh_s0 /* ========================================================================== Assembly Syntax: Rxx32+=mpyu(Rs32.l,Rt32.h):<<1 C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RlRh_s1(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyuacc_RlRh_s1 __builtin_HEXAGON_M2_mpyud_acc_lh_s1 /* ========================================================================== Assembly Syntax: Rxx32+=mpyu(Rs32.l,Rt32.l) C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RlRl(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyuacc_RlRl __builtin_HEXAGON_M2_mpyud_acc_ll_s0 /* ========================================================================== Assembly Syntax: Rxx32+=mpyu(Rs32.l,Rt32.l):<<1 C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RlRl_s1(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyuacc_RlRl_s1 __builtin_HEXAGON_M2_mpyud_acc_ll_s1 /* ========================================================================== Assembly Syntax: Rdd32=mpyu(Rs32.h,Rt32.h) C Intrinsic Prototype: UWord64 Q6_P_mpyu_RhRh(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyu_RhRh __builtin_HEXAGON_M2_mpyud_hh_s0 /* ========================================================================== Assembly Syntax: Rdd32=mpyu(Rs32.h,Rt32.h):<<1 C Intrinsic Prototype: UWord64 Q6_P_mpyu_RhRh_s1(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyu_RhRh_s1 __builtin_HEXAGON_M2_mpyud_hh_s1 /* ========================================================================== Assembly Syntax: Rdd32=mpyu(Rs32.h,Rt32.l) C Intrinsic Prototype: UWord64 Q6_P_mpyu_RhRl(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyu_RhRl __builtin_HEXAGON_M2_mpyud_hl_s0 /* ========================================================================== Assembly Syntax: Rdd32=mpyu(Rs32.h,Rt32.l):<<1 C Intrinsic Prototype: UWord64 Q6_P_mpyu_RhRl_s1(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyu_RhRl_s1 __builtin_HEXAGON_M2_mpyud_hl_s1 /* ========================================================================== Assembly Syntax: Rdd32=mpyu(Rs32.l,Rt32.h) C Intrinsic Prototype: UWord64 Q6_P_mpyu_RlRh(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyu_RlRh __builtin_HEXAGON_M2_mpyud_lh_s0 /* ========================================================================== Assembly Syntax: Rdd32=mpyu(Rs32.l,Rt32.h):<<1 C Intrinsic Prototype: UWord64 Q6_P_mpyu_RlRh_s1(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyu_RlRh_s1 __builtin_HEXAGON_M2_mpyud_lh_s1 /* ========================================================================== Assembly Syntax: Rdd32=mpyu(Rs32.l,Rt32.l) C Intrinsic Prototype: UWord64 Q6_P_mpyu_RlRl(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyu_RlRl __builtin_HEXAGON_M2_mpyud_ll_s0 /* ========================================================================== Assembly Syntax: Rdd32=mpyu(Rs32.l,Rt32.l):<<1 C Intrinsic Prototype: UWord64 Q6_P_mpyu_RlRl_s1(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyu_RlRl_s1 __builtin_HEXAGON_M2_mpyud_ll_s1 /* ========================================================================== Assembly Syntax: Rxx32-=mpyu(Rs32.h,Rt32.h) C Intrinsic Prototype: Word64 Q6_P_mpyunac_RhRh(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyunac_RhRh __builtin_HEXAGON_M2_mpyud_nac_hh_s0 /* ========================================================================== Assembly Syntax: Rxx32-=mpyu(Rs32.h,Rt32.h):<<1 C Intrinsic Prototype: Word64 Q6_P_mpyunac_RhRh_s1(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyunac_RhRh_s1 __builtin_HEXAGON_M2_mpyud_nac_hh_s1 /* ========================================================================== Assembly Syntax: Rxx32-=mpyu(Rs32.h,Rt32.l) C Intrinsic Prototype: Word64 Q6_P_mpyunac_RhRl(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyunac_RhRl __builtin_HEXAGON_M2_mpyud_nac_hl_s0 /* ========================================================================== Assembly Syntax: Rxx32-=mpyu(Rs32.h,Rt32.l):<<1 C Intrinsic Prototype: Word64 Q6_P_mpyunac_RhRl_s1(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyunac_RhRl_s1 __builtin_HEXAGON_M2_mpyud_nac_hl_s1 /* ========================================================================== Assembly Syntax: Rxx32-=mpyu(Rs32.l,Rt32.h) C Intrinsic Prototype: Word64 Q6_P_mpyunac_RlRh(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyunac_RlRh __builtin_HEXAGON_M2_mpyud_nac_lh_s0 /* ========================================================================== Assembly Syntax: Rxx32-=mpyu(Rs32.l,Rt32.h):<<1 C Intrinsic Prototype: Word64 Q6_P_mpyunac_RlRh_s1(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyunac_RlRh_s1 __builtin_HEXAGON_M2_mpyud_nac_lh_s1 /* ========================================================================== Assembly Syntax: Rxx32-=mpyu(Rs32.l,Rt32.l) C Intrinsic Prototype: Word64 Q6_P_mpyunac_RlRl(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyunac_RlRl __builtin_HEXAGON_M2_mpyud_nac_ll_s0 /* ========================================================================== Assembly Syntax: Rxx32-=mpyu(Rs32.l,Rt32.l):<<1 C Intrinsic Prototype: Word64 Q6_P_mpyunac_RlRl_s1(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_mpyunac_RlRl_s1 __builtin_HEXAGON_M2_mpyud_nac_ll_s1 /* ========================================================================== Assembly Syntax: Rd32=mpyui(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_mpyui_RR(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_mpyui_RR __builtin_HEXAGON_M2_mpyui /* ========================================================================== Assembly Syntax: Rx32-=add(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_addnac_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_addnac_RR __builtin_HEXAGON_M2_nacci /* ========================================================================== Assembly Syntax: Rx32-=add(Rs32,#s8) C Intrinsic Prototype: Word32 Q6_R_addnac_RI(Word32 Rx, Word32 Rs, Word32 Is8) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_addnac_RI __builtin_HEXAGON_M2_naccii /* ========================================================================== Assembly Syntax: Rx32+=sub(Rt32,Rs32) C Intrinsic Prototype: Word32 Q6_R_subacc_RR(Word32 Rx, Word32 Rt, Word32 Rs) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_subacc_RR __builtin_HEXAGON_M2_subacc /* ========================================================================== Assembly Syntax: Rdd32=vabsdiffh(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vabsdiffh_PP(Word64 Rtt, Word64 Rss) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vabsdiffh_PP __builtin_HEXAGON_M2_vabsdiffh /* ========================================================================== Assembly Syntax: Rdd32=vabsdiffw(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vabsdiffw_PP(Word64 Rtt, Word64 Rss) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vabsdiffw_PP __builtin_HEXAGON_M2_vabsdiffw /* ========================================================================== Assembly Syntax: Rxx32+=vcmpyi(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vcmpyiacc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vcmpyiacc_PP_sat __builtin_HEXAGON_M2_vcmac_s0_sat_i /* ========================================================================== Assembly Syntax: Rxx32+=vcmpyr(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vcmpyracc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vcmpyracc_PP_sat __builtin_HEXAGON_M2_vcmac_s0_sat_r /* ========================================================================== Assembly Syntax: Rdd32=vcmpyi(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vcmpyi_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vcmpyi_PP_sat __builtin_HEXAGON_M2_vcmpy_s0_sat_i /* ========================================================================== Assembly Syntax: Rdd32=vcmpyr(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vcmpyr_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vcmpyr_PP_sat __builtin_HEXAGON_M2_vcmpy_s0_sat_r /* ========================================================================== Assembly Syntax: Rdd32=vcmpyi(Rss32,Rtt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vcmpyi_PP_s1_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vcmpyi_PP_s1_sat __builtin_HEXAGON_M2_vcmpy_s1_sat_i /* ========================================================================== Assembly Syntax: Rdd32=vcmpyr(Rss32,Rtt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vcmpyr_PP_s1_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vcmpyr_PP_s1_sat __builtin_HEXAGON_M2_vcmpy_s1_sat_r /* ========================================================================== Assembly Syntax: Rxx32+=vdmpy(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vdmpyacc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vdmpyacc_PP_sat __builtin_HEXAGON_M2_vdmacs_s0 /* ========================================================================== Assembly Syntax: Rxx32+=vdmpy(Rss32,Rtt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vdmpyacc_PP_s1_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vdmpyacc_PP_s1_sat __builtin_HEXAGON_M2_vdmacs_s1 /* ========================================================================== Assembly Syntax: Rd32=vdmpy(Rss32,Rtt32):rnd:sat C Intrinsic Prototype: Word32 Q6_R_vdmpy_PP_rnd_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vdmpy_PP_rnd_sat __builtin_HEXAGON_M2_vdmpyrs_s0 /* ========================================================================== Assembly Syntax: Rd32=vdmpy(Rss32,Rtt32):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_vdmpy_PP_s1_rnd_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vdmpy_PP_s1_rnd_sat __builtin_HEXAGON_M2_vdmpyrs_s1 /* ========================================================================== Assembly Syntax: Rdd32=vdmpy(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vdmpy_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vdmpy_PP_sat __builtin_HEXAGON_M2_vdmpys_s0 /* ========================================================================== Assembly Syntax: Rdd32=vdmpy(Rss32,Rtt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vdmpy_PP_s1_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vdmpy_PP_s1_sat __builtin_HEXAGON_M2_vdmpys_s1 /* ========================================================================== Assembly Syntax: Rxx32+=vmpyh(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vmpyhacc_RR(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyhacc_RR __builtin_HEXAGON_M2_vmac2 /* ========================================================================== Assembly Syntax: Rxx32+=vmpyeh(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vmpyehacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyehacc_PP __builtin_HEXAGON_M2_vmac2es /* ========================================================================== Assembly Syntax: Rxx32+=vmpyeh(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vmpyehacc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyehacc_PP_sat __builtin_HEXAGON_M2_vmac2es_s0 /* ========================================================================== Assembly Syntax: Rxx32+=vmpyeh(Rss32,Rtt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vmpyehacc_PP_s1_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyehacc_PP_s1_sat __builtin_HEXAGON_M2_vmac2es_s1 /* ========================================================================== Assembly Syntax: Rxx32+=vmpyh(Rs32,Rt32):sat C Intrinsic Prototype: Word64 Q6_P_vmpyhacc_RR_sat(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyhacc_RR_sat __builtin_HEXAGON_M2_vmac2s_s0 /* ========================================================================== Assembly Syntax: Rxx32+=vmpyh(Rs32,Rt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vmpyhacc_RR_s1_sat(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyhacc_RR_s1_sat __builtin_HEXAGON_M2_vmac2s_s1 /* ========================================================================== Assembly Syntax: Rxx32+=vmpyhsu(Rs32,Rt32):sat C Intrinsic Prototype: Word64 Q6_P_vmpyhsuacc_RR_sat(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyhsuacc_RR_sat __builtin_HEXAGON_M2_vmac2su_s0 /* ========================================================================== Assembly Syntax: Rxx32+=vmpyhsu(Rs32,Rt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vmpyhsuacc_RR_s1_sat(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyhsuacc_RR_s1_sat __builtin_HEXAGON_M2_vmac2su_s1 /* ========================================================================== Assembly Syntax: Rdd32=vmpyeh(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vmpyeh_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyeh_PP_sat __builtin_HEXAGON_M2_vmpy2es_s0 /* ========================================================================== Assembly Syntax: Rdd32=vmpyeh(Rss32,Rtt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vmpyeh_PP_s1_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyeh_PP_s1_sat __builtin_HEXAGON_M2_vmpy2es_s1 /* ========================================================================== Assembly Syntax: Rdd32=vmpyh(Rs32,Rt32):sat C Intrinsic Prototype: Word64 Q6_P_vmpyh_RR_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyh_RR_sat __builtin_HEXAGON_M2_vmpy2s_s0 /* ========================================================================== Assembly Syntax: Rd32=vmpyh(Rs32,Rt32):rnd:sat C Intrinsic Prototype: Word32 Q6_R_vmpyh_RR_rnd_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vmpyh_RR_rnd_sat __builtin_HEXAGON_M2_vmpy2s_s0pack /* ========================================================================== Assembly Syntax: Rdd32=vmpyh(Rs32,Rt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vmpyh_RR_s1_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyh_RR_s1_sat __builtin_HEXAGON_M2_vmpy2s_s1 /* ========================================================================== Assembly Syntax: Rd32=vmpyh(Rs32,Rt32):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_vmpyh_RR_s1_rnd_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vmpyh_RR_s1_rnd_sat __builtin_HEXAGON_M2_vmpy2s_s1pack /* ========================================================================== Assembly Syntax: Rdd32=vmpyhsu(Rs32,Rt32):sat C Intrinsic Prototype: Word64 Q6_P_vmpyhsu_RR_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyhsu_RR_sat __builtin_HEXAGON_M2_vmpy2su_s0 /* ========================================================================== Assembly Syntax: Rdd32=vmpyhsu(Rs32,Rt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vmpyhsu_RR_s1_sat(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpyhsu_RR_s1_sat __builtin_HEXAGON_M2_vmpy2su_s1 /* ========================================================================== Assembly Syntax: Rd32=vraddh(Rss32,Rtt32) C Intrinsic Prototype: Word32 Q6_R_vraddh_PP(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vraddh_PP __builtin_HEXAGON_M2_vraddh /* ========================================================================== Assembly Syntax: Rd32=vradduh(Rss32,Rtt32) C Intrinsic Prototype: Word32 Q6_R_vradduh_PP(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vradduh_PP __builtin_HEXAGON_M2_vradduh /* ========================================================================== Assembly Syntax: Rxx32+=vrcmpyi(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vrcmpyiacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrcmpyiacc_PP __builtin_HEXAGON_M2_vrcmaci_s0 /* ========================================================================== Assembly Syntax: Rxx32+=vrcmpyi(Rss32,Rtt32*) C Intrinsic Prototype: Word64 Q6_P_vrcmpyiacc_PP_conj(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrcmpyiacc_PP_conj __builtin_HEXAGON_M2_vrcmaci_s0c /* ========================================================================== Assembly Syntax: Rxx32+=vrcmpyr(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vrcmpyracc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrcmpyracc_PP __builtin_HEXAGON_M2_vrcmacr_s0 /* ========================================================================== Assembly Syntax: Rxx32+=vrcmpyr(Rss32,Rtt32*) C Intrinsic Prototype: Word64 Q6_P_vrcmpyracc_PP_conj(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrcmpyracc_PP_conj __builtin_HEXAGON_M2_vrcmacr_s0c /* ========================================================================== Assembly Syntax: Rdd32=vrcmpyi(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vrcmpyi_PP(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrcmpyi_PP __builtin_HEXAGON_M2_vrcmpyi_s0 /* ========================================================================== Assembly Syntax: Rdd32=vrcmpyi(Rss32,Rtt32*) C Intrinsic Prototype: Word64 Q6_P_vrcmpyi_PP_conj(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrcmpyi_PP_conj __builtin_HEXAGON_M2_vrcmpyi_s0c /* ========================================================================== Assembly Syntax: Rdd32=vrcmpyr(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vrcmpyr_PP(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrcmpyr_PP __builtin_HEXAGON_M2_vrcmpyr_s0 /* ========================================================================== Assembly Syntax: Rdd32=vrcmpyr(Rss32,Rtt32*) C Intrinsic Prototype: Word64 Q6_P_vrcmpyr_PP_conj(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrcmpyr_PP_conj __builtin_HEXAGON_M2_vrcmpyr_s0c /* ========================================================================== Assembly Syntax: Rxx32+=vrcmpys(Rss32,Rt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vrcmpysacc_PR_s1_sat(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: M Execution Slots: SLOT0123 ========================================================================== */ #define Q6_P_vrcmpysacc_PR_s1_sat __builtin_HEXAGON_M2_vrcmpys_acc_s1 /* ========================================================================== Assembly Syntax: Rdd32=vrcmpys(Rss32,Rt32):<<1:sat C Intrinsic Prototype: Word64 Q6_P_vrcmpys_PR_s1_sat(Word64 Rss, Word32 Rt) Instruction Type: M Execution Slots: SLOT0123 ========================================================================== */ #define Q6_P_vrcmpys_PR_s1_sat __builtin_HEXAGON_M2_vrcmpys_s1 /* ========================================================================== Assembly Syntax: Rd32=vrcmpys(Rss32,Rt32):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_vrcmpys_PR_s1_rnd_sat(Word64 Rss, Word32 Rt) Instruction Type: M Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_vrcmpys_PR_s1_rnd_sat __builtin_HEXAGON_M2_vrcmpys_s1rp /* ========================================================================== Assembly Syntax: Rxx32+=vrmpyh(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vrmpyhacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmpyhacc_PP __builtin_HEXAGON_M2_vrmac_s0 /* ========================================================================== Assembly Syntax: Rdd32=vrmpyh(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vrmpyh_PP(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmpyh_PP __builtin_HEXAGON_M2_vrmpy_s0 /* ========================================================================== Assembly Syntax: Rx32^=xor(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_xorxacc_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_xorxacc_RR __builtin_HEXAGON_M2_xor_xacc /* ========================================================================== Assembly Syntax: Rx32&=and(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_andand_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_andand_RR __builtin_HEXAGON_M4_and_and /* ========================================================================== Assembly Syntax: Rx32&=and(Rs32,~Rt32) C Intrinsic Prototype: Word32 Q6_R_andand_RnR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_andand_RnR __builtin_HEXAGON_M4_and_andn /* ========================================================================== Assembly Syntax: Rx32&=or(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_orand_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_orand_RR __builtin_HEXAGON_M4_and_or /* ========================================================================== Assembly Syntax: Rx32&=xor(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_xorand_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_xorand_RR __builtin_HEXAGON_M4_and_xor /* ========================================================================== Assembly Syntax: Rd32=cmpyiwh(Rss32,Rt32):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_cmpyiwh_PR_s1_rnd_sat(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_cmpyiwh_PR_s1_rnd_sat __builtin_HEXAGON_M4_cmpyi_wh /* ========================================================================== Assembly Syntax: Rd32=cmpyiwh(Rss32,Rt32*):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_cmpyiwh_PR_conj_s1_rnd_sat(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_cmpyiwh_PR_conj_s1_rnd_sat __builtin_HEXAGON_M4_cmpyi_whc /* ========================================================================== Assembly Syntax: Rd32=cmpyrwh(Rss32,Rt32):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_cmpyrwh_PR_s1_rnd_sat(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_cmpyrwh_PR_s1_rnd_sat __builtin_HEXAGON_M4_cmpyr_wh /* ========================================================================== Assembly Syntax: Rd32=cmpyrwh(Rss32,Rt32*):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_cmpyrwh_PR_conj_s1_rnd_sat(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_cmpyrwh_PR_conj_s1_rnd_sat __builtin_HEXAGON_M4_cmpyr_whc /* ========================================================================== Assembly Syntax: Rx32+=mpy(Rs32,Rt32):<<1:sat C Intrinsic Prototype: Word32 Q6_R_mpyacc_RR_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyacc_RR_s1_sat __builtin_HEXAGON_M4_mac_up_s1_sat /* ========================================================================== Assembly Syntax: Rd32=add(#u6,mpyi(Rs32,#U6)) C Intrinsic Prototype: Word32 Q6_R_add_mpyi_IRI(Word32 Iu6, Word32 Rs, Word32 IU6) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_mpyi_IRI __builtin_HEXAGON_M4_mpyri_addi /* ========================================================================== Assembly Syntax: Rd32=add(Ru32,mpyi(Rs32,#u6)) C Intrinsic Prototype: Word32 Q6_R_add_mpyi_RRI(Word32 Ru, Word32 Rs, Word32 Iu6) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_mpyi_RRI __builtin_HEXAGON_M4_mpyri_addr /* ========================================================================== Assembly Syntax: Rd32=add(Ru32,mpyi(#u6:2,Rs32)) C Intrinsic Prototype: Word32 Q6_R_add_mpyi_RIR(Word32 Ru, Word32 Iu6_2, Word32 Rs) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_mpyi_RIR __builtin_HEXAGON_M4_mpyri_addr_u2 /* ========================================================================== Assembly Syntax: Rd32=add(#u6,mpyi(Rs32,Rt32)) C Intrinsic Prototype: Word32 Q6_R_add_mpyi_IRR(Word32 Iu6, Word32 Rs, Word32 Rt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_mpyi_IRR __builtin_HEXAGON_M4_mpyrr_addi /* ========================================================================== Assembly Syntax: Ry32=add(Ru32,mpyi(Ry32,Rs32)) C Intrinsic Prototype: Word32 Q6_R_add_mpyi_RRR(Word32 Ru, Word32 Ry, Word32 Rs) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_mpyi_RRR __builtin_HEXAGON_M4_mpyrr_addr /* ========================================================================== Assembly Syntax: Rx32-=mpy(Rs32,Rt32):<<1:sat C Intrinsic Prototype: Word32 Q6_R_mpynac_RR_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpynac_RR_s1_sat __builtin_HEXAGON_M4_nac_up_s1_sat /* ========================================================================== Assembly Syntax: Rx32|=and(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_andor_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_andor_RR __builtin_HEXAGON_M4_or_and /* ========================================================================== Assembly Syntax: Rx32|=and(Rs32,~Rt32) C Intrinsic Prototype: Word32 Q6_R_andor_RnR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_andor_RnR __builtin_HEXAGON_M4_or_andn /* ========================================================================== Assembly Syntax: Rx32|=or(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_oror_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_oror_RR __builtin_HEXAGON_M4_or_or /* ========================================================================== Assembly Syntax: Rx32|=xor(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_xoror_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_xoror_RR __builtin_HEXAGON_M4_or_xor /* ========================================================================== Assembly Syntax: Rdd32=pmpyw(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_pmpyw_RR(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_pmpyw_RR __builtin_HEXAGON_M4_pmpyw /* ========================================================================== Assembly Syntax: Rxx32^=pmpyw(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_pmpywxacc_RR(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_pmpywxacc_RR __builtin_HEXAGON_M4_pmpyw_acc /* ========================================================================== Assembly Syntax: Rdd32=vpmpyh(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vpmpyh_RR(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vpmpyh_RR __builtin_HEXAGON_M4_vpmpyh /* ========================================================================== Assembly Syntax: Rxx32^=vpmpyh(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vpmpyhxacc_RR(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vpmpyhxacc_RR __builtin_HEXAGON_M4_vpmpyh_acc /* ========================================================================== Assembly Syntax: Rxx32+=vrmpyweh(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vrmpywehacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmpywehacc_PP __builtin_HEXAGON_M4_vrmpyeh_acc_s0 /* ========================================================================== Assembly Syntax: Rxx32+=vrmpyweh(Rss32,Rtt32):<<1 C Intrinsic Prototype: Word64 Q6_P_vrmpywehacc_PP_s1(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmpywehacc_PP_s1 __builtin_HEXAGON_M4_vrmpyeh_acc_s1 /* ========================================================================== Assembly Syntax: Rdd32=vrmpyweh(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vrmpyweh_PP(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmpyweh_PP __builtin_HEXAGON_M4_vrmpyeh_s0 /* ========================================================================== Assembly Syntax: Rdd32=vrmpyweh(Rss32,Rtt32):<<1 C Intrinsic Prototype: Word64 Q6_P_vrmpyweh_PP_s1(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmpyweh_PP_s1 __builtin_HEXAGON_M4_vrmpyeh_s1 /* ========================================================================== Assembly Syntax: Rxx32+=vrmpywoh(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vrmpywohacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmpywohacc_PP __builtin_HEXAGON_M4_vrmpyoh_acc_s0 /* ========================================================================== Assembly Syntax: Rxx32+=vrmpywoh(Rss32,Rtt32):<<1 C Intrinsic Prototype: Word64 Q6_P_vrmpywohacc_PP_s1(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmpywohacc_PP_s1 __builtin_HEXAGON_M4_vrmpyoh_acc_s1 /* ========================================================================== Assembly Syntax: Rdd32=vrmpywoh(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vrmpywoh_PP(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmpywoh_PP __builtin_HEXAGON_M4_vrmpyoh_s0 /* ========================================================================== Assembly Syntax: Rdd32=vrmpywoh(Rss32,Rtt32):<<1 C Intrinsic Prototype: Word64 Q6_P_vrmpywoh_PP_s1(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmpywoh_PP_s1 __builtin_HEXAGON_M4_vrmpyoh_s1 /* ========================================================================== Assembly Syntax: Rx32^=and(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_andxacc_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_andxacc_RR __builtin_HEXAGON_M4_xor_and /* ========================================================================== Assembly Syntax: Rx32^=and(Rs32,~Rt32) C Intrinsic Prototype: Word32 Q6_R_andxacc_RnR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_andxacc_RnR __builtin_HEXAGON_M4_xor_andn /* ========================================================================== Assembly Syntax: Rx32^=or(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_orxacc_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_orxacc_RR __builtin_HEXAGON_M4_xor_or /* ========================================================================== Assembly Syntax: Rxx32^=xor(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_xorxacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_xorxacc_PP __builtin_HEXAGON_M4_xor_xacc /* ========================================================================== Assembly Syntax: Rxx32+=vdmpybsu(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vdmpybsuacc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vdmpybsuacc_PP_sat __builtin_HEXAGON_M5_vdmacbsu /* ========================================================================== Assembly Syntax: Rdd32=vdmpybsu(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vdmpybsu_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vdmpybsu_PP_sat __builtin_HEXAGON_M5_vdmpybsu /* ========================================================================== Assembly Syntax: Rxx32+=vmpybsu(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vmpybsuacc_RR(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpybsuacc_RR __builtin_HEXAGON_M5_vmacbsu /* ========================================================================== Assembly Syntax: Rxx32+=vmpybu(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vmpybuacc_RR(Word64 Rxx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpybuacc_RR __builtin_HEXAGON_M5_vmacbuu /* ========================================================================== Assembly Syntax: Rdd32=vmpybsu(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vmpybsu_RR(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpybsu_RR __builtin_HEXAGON_M5_vmpybsu /* ========================================================================== Assembly Syntax: Rdd32=vmpybu(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vmpybu_RR(Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vmpybu_RR __builtin_HEXAGON_M5_vmpybuu /* ========================================================================== Assembly Syntax: Rxx32+=vrmpybsu(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vrmpybsuacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmpybsuacc_PP __builtin_HEXAGON_M5_vrmacbsu /* ========================================================================== Assembly Syntax: Rxx32+=vrmpybu(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vrmpybuacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmpybuacc_PP __builtin_HEXAGON_M5_vrmacbuu /* ========================================================================== Assembly Syntax: Rdd32=vrmpybsu(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vrmpybsu_PP(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmpybsu_PP __builtin_HEXAGON_M5_vrmpybsu /* ========================================================================== Assembly Syntax: Rdd32=vrmpybu(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vrmpybu_PP(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrmpybu_PP __builtin_HEXAGON_M5_vrmpybuu /* ========================================================================== Assembly Syntax: Rd32=addasl(Rt32,Rs32,#u3) C Intrinsic Prototype: Word32 Q6_R_addasl_RRI(Word32 Rt, Word32 Rs, Word32 Iu3) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_addasl_RRI __builtin_HEXAGON_S2_addasl_rrri /* ========================================================================== Assembly Syntax: Rdd32=asl(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_asl_PI(Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_asl_PI __builtin_HEXAGON_S2_asl_i_p /* ========================================================================== Assembly Syntax: Rxx32+=asl(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_aslacc_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_aslacc_PI __builtin_HEXAGON_S2_asl_i_p_acc /* ========================================================================== Assembly Syntax: Rxx32&=asl(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_asland_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_asland_PI __builtin_HEXAGON_S2_asl_i_p_and /* ========================================================================== Assembly Syntax: Rxx32-=asl(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_aslnac_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_aslnac_PI __builtin_HEXAGON_S2_asl_i_p_nac /* ========================================================================== Assembly Syntax: Rxx32|=asl(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_aslor_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_aslor_PI __builtin_HEXAGON_S2_asl_i_p_or /* ========================================================================== Assembly Syntax: Rxx32^=asl(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_aslxacc_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_aslxacc_PI __builtin_HEXAGON_S2_asl_i_p_xacc /* ========================================================================== Assembly Syntax: Rd32=asl(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_asl_RI(Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asl_RI __builtin_HEXAGON_S2_asl_i_r /* ========================================================================== Assembly Syntax: Rx32+=asl(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_aslacc_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_aslacc_RI __builtin_HEXAGON_S2_asl_i_r_acc /* ========================================================================== Assembly Syntax: Rx32&=asl(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_asland_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asland_RI __builtin_HEXAGON_S2_asl_i_r_and /* ========================================================================== Assembly Syntax: Rx32-=asl(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_aslnac_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_aslnac_RI __builtin_HEXAGON_S2_asl_i_r_nac /* ========================================================================== Assembly Syntax: Rx32|=asl(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_aslor_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_aslor_RI __builtin_HEXAGON_S2_asl_i_r_or /* ========================================================================== Assembly Syntax: Rd32=asl(Rs32,#u5):sat C Intrinsic Prototype: Word32 Q6_R_asl_RI_sat(Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asl_RI_sat __builtin_HEXAGON_S2_asl_i_r_sat /* ========================================================================== Assembly Syntax: Rx32^=asl(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_aslxacc_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_aslxacc_RI __builtin_HEXAGON_S2_asl_i_r_xacc /* ========================================================================== Assembly Syntax: Rdd32=vaslh(Rss32,#u4) C Intrinsic Prototype: Word64 Q6_P_vaslh_PI(Word64 Rss, Word32 Iu4) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vaslh_PI __builtin_HEXAGON_S2_asl_i_vh /* ========================================================================== Assembly Syntax: Rdd32=vaslw(Rss32,#u5) C Intrinsic Prototype: Word64 Q6_P_vaslw_PI(Word64 Rss, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vaslw_PI __builtin_HEXAGON_S2_asl_i_vw /* ========================================================================== Assembly Syntax: Rdd32=asl(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_asl_PR(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_asl_PR __builtin_HEXAGON_S2_asl_r_p /* ========================================================================== Assembly Syntax: Rxx32+=asl(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_aslacc_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_aslacc_PR __builtin_HEXAGON_S2_asl_r_p_acc /* ========================================================================== Assembly Syntax: Rxx32&=asl(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_asland_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_asland_PR __builtin_HEXAGON_S2_asl_r_p_and /* ========================================================================== Assembly Syntax: Rxx32-=asl(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_aslnac_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_aslnac_PR __builtin_HEXAGON_S2_asl_r_p_nac /* ========================================================================== Assembly Syntax: Rxx32|=asl(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_aslor_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_aslor_PR __builtin_HEXAGON_S2_asl_r_p_or /* ========================================================================== Assembly Syntax: Rxx32^=asl(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_aslxacc_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_aslxacc_PR __builtin_HEXAGON_S2_asl_r_p_xor /* ========================================================================== Assembly Syntax: Rd32=asl(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_asl_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asl_RR __builtin_HEXAGON_S2_asl_r_r /* ========================================================================== Assembly Syntax: Rx32+=asl(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_aslacc_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_aslacc_RR __builtin_HEXAGON_S2_asl_r_r_acc /* ========================================================================== Assembly Syntax: Rx32&=asl(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_asland_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asland_RR __builtin_HEXAGON_S2_asl_r_r_and /* ========================================================================== Assembly Syntax: Rx32-=asl(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_aslnac_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_aslnac_RR __builtin_HEXAGON_S2_asl_r_r_nac /* ========================================================================== Assembly Syntax: Rx32|=asl(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_aslor_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_aslor_RR __builtin_HEXAGON_S2_asl_r_r_or /* ========================================================================== Assembly Syntax: Rd32=asl(Rs32,Rt32):sat C Intrinsic Prototype: Word32 Q6_R_asl_RR_sat(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asl_RR_sat __builtin_HEXAGON_S2_asl_r_r_sat /* ========================================================================== Assembly Syntax: Rdd32=vaslh(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vaslh_PR(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vaslh_PR __builtin_HEXAGON_S2_asl_r_vh /* ========================================================================== Assembly Syntax: Rdd32=vaslw(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vaslw_PR(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vaslw_PR __builtin_HEXAGON_S2_asl_r_vw /* ========================================================================== Assembly Syntax: Rdd32=asr(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_asr_PI(Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_asr_PI __builtin_HEXAGON_S2_asr_i_p /* ========================================================================== Assembly Syntax: Rxx32+=asr(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_asracc_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_asracc_PI __builtin_HEXAGON_S2_asr_i_p_acc /* ========================================================================== Assembly Syntax: Rxx32&=asr(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_asrand_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_asrand_PI __builtin_HEXAGON_S2_asr_i_p_and /* ========================================================================== Assembly Syntax: Rxx32-=asr(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_asrnac_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_asrnac_PI __builtin_HEXAGON_S2_asr_i_p_nac /* ========================================================================== Assembly Syntax: Rxx32|=asr(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_asror_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_asror_PI __builtin_HEXAGON_S2_asr_i_p_or /* ========================================================================== Assembly Syntax: Rdd32=asr(Rss32,#u6):rnd C Intrinsic Prototype: Word64 Q6_P_asr_PI_rnd(Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_asr_PI_rnd __builtin_HEXAGON_S2_asr_i_p_rnd /* ========================================================================== Assembly Syntax: Rdd32=asrrnd(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_asrrnd_PI(Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_P_asrrnd_PI __builtin_HEXAGON_S2_asr_i_p_rnd_goodsyntax /* ========================================================================== Assembly Syntax: Rd32=asr(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_asr_RI(Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asr_RI __builtin_HEXAGON_S2_asr_i_r /* ========================================================================== Assembly Syntax: Rx32+=asr(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_asracc_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asracc_RI __builtin_HEXAGON_S2_asr_i_r_acc /* ========================================================================== Assembly Syntax: Rx32&=asr(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_asrand_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asrand_RI __builtin_HEXAGON_S2_asr_i_r_and /* ========================================================================== Assembly Syntax: Rx32-=asr(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_asrnac_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asrnac_RI __builtin_HEXAGON_S2_asr_i_r_nac /* ========================================================================== Assembly Syntax: Rx32|=asr(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_asror_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asror_RI __builtin_HEXAGON_S2_asr_i_r_or /* ========================================================================== Assembly Syntax: Rd32=asr(Rs32,#u5):rnd C Intrinsic Prototype: Word32 Q6_R_asr_RI_rnd(Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asr_RI_rnd __builtin_HEXAGON_S2_asr_i_r_rnd /* ========================================================================== Assembly Syntax: Rd32=asrrnd(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_asrrnd_RI(Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_asrrnd_RI __builtin_HEXAGON_S2_asr_i_r_rnd_goodsyntax /* ========================================================================== Assembly Syntax: Rd32=vasrw(Rss32,#u5) C Intrinsic Prototype: Word32 Q6_R_vasrw_PI(Word64 Rss, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vasrw_PI __builtin_HEXAGON_S2_asr_i_svw_trun /* ========================================================================== Assembly Syntax: Rdd32=vasrh(Rss32,#u4) C Intrinsic Prototype: Word64 Q6_P_vasrh_PI(Word64 Rss, Word32 Iu4) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vasrh_PI __builtin_HEXAGON_S2_asr_i_vh /* ========================================================================== Assembly Syntax: Rdd32=vasrw(Rss32,#u5) C Intrinsic Prototype: Word64 Q6_P_vasrw_PI(Word64 Rss, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vasrw_PI __builtin_HEXAGON_S2_asr_i_vw /* ========================================================================== Assembly Syntax: Rdd32=asr(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_asr_PR(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_asr_PR __builtin_HEXAGON_S2_asr_r_p /* ========================================================================== Assembly Syntax: Rxx32+=asr(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_asracc_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_asracc_PR __builtin_HEXAGON_S2_asr_r_p_acc /* ========================================================================== Assembly Syntax: Rxx32&=asr(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_asrand_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_asrand_PR __builtin_HEXAGON_S2_asr_r_p_and /* ========================================================================== Assembly Syntax: Rxx32-=asr(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_asrnac_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_asrnac_PR __builtin_HEXAGON_S2_asr_r_p_nac /* ========================================================================== Assembly Syntax: Rxx32|=asr(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_asror_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_asror_PR __builtin_HEXAGON_S2_asr_r_p_or /* ========================================================================== Assembly Syntax: Rxx32^=asr(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_asrxacc_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_asrxacc_PR __builtin_HEXAGON_S2_asr_r_p_xor /* ========================================================================== Assembly Syntax: Rd32=asr(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_asr_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asr_RR __builtin_HEXAGON_S2_asr_r_r /* ========================================================================== Assembly Syntax: Rx32+=asr(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_asracc_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asracc_RR __builtin_HEXAGON_S2_asr_r_r_acc /* ========================================================================== Assembly Syntax: Rx32&=asr(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_asrand_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asrand_RR __builtin_HEXAGON_S2_asr_r_r_and /* ========================================================================== Assembly Syntax: Rx32-=asr(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_asrnac_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asrnac_RR __builtin_HEXAGON_S2_asr_r_r_nac /* ========================================================================== Assembly Syntax: Rx32|=asr(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_asror_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asror_RR __builtin_HEXAGON_S2_asr_r_r_or /* ========================================================================== Assembly Syntax: Rd32=asr(Rs32,Rt32):sat C Intrinsic Prototype: Word32 Q6_R_asr_RR_sat(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_asr_RR_sat __builtin_HEXAGON_S2_asr_r_r_sat /* ========================================================================== Assembly Syntax: Rd32=vasrw(Rss32,Rt32) C Intrinsic Prototype: Word32 Q6_R_vasrw_PR(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vasrw_PR __builtin_HEXAGON_S2_asr_r_svw_trun /* ========================================================================== Assembly Syntax: Rdd32=vasrh(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vasrh_PR(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vasrh_PR __builtin_HEXAGON_S2_asr_r_vh /* ========================================================================== Assembly Syntax: Rdd32=vasrw(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vasrw_PR(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vasrw_PR __builtin_HEXAGON_S2_asr_r_vw /* ========================================================================== Assembly Syntax: Rd32=brev(Rs32) C Intrinsic Prototype: Word32 Q6_R_brev_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_brev_R __builtin_HEXAGON_S2_brev /* ========================================================================== Assembly Syntax: Rdd32=brev(Rss32) C Intrinsic Prototype: Word64 Q6_P_brev_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_brev_P __builtin_HEXAGON_S2_brevp /* ========================================================================== Assembly Syntax: Rd32=cl0(Rs32) C Intrinsic Prototype: Word32 Q6_R_cl0_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_cl0_R __builtin_HEXAGON_S2_cl0 /* ========================================================================== Assembly Syntax: Rd32=cl0(Rss32) C Intrinsic Prototype: Word32 Q6_R_cl0_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_cl0_P __builtin_HEXAGON_S2_cl0p /* ========================================================================== Assembly Syntax: Rd32=cl1(Rs32) C Intrinsic Prototype: Word32 Q6_R_cl1_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_cl1_R __builtin_HEXAGON_S2_cl1 /* ========================================================================== Assembly Syntax: Rd32=cl1(Rss32) C Intrinsic Prototype: Word32 Q6_R_cl1_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_cl1_P __builtin_HEXAGON_S2_cl1p /* ========================================================================== Assembly Syntax: Rd32=clb(Rs32) C Intrinsic Prototype: Word32 Q6_R_clb_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_clb_R __builtin_HEXAGON_S2_clb /* ========================================================================== Assembly Syntax: Rd32=normamt(Rs32) C Intrinsic Prototype: Word32 Q6_R_normamt_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_normamt_R __builtin_HEXAGON_S2_clbnorm /* ========================================================================== Assembly Syntax: Rd32=clb(Rss32) C Intrinsic Prototype: Word32 Q6_R_clb_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_clb_P __builtin_HEXAGON_S2_clbp /* ========================================================================== Assembly Syntax: Rd32=clrbit(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_clrbit_RI(Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_clrbit_RI __builtin_HEXAGON_S2_clrbit_i /* ========================================================================== Assembly Syntax: Rd32=clrbit(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_clrbit_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_clrbit_RR __builtin_HEXAGON_S2_clrbit_r /* ========================================================================== Assembly Syntax: Rd32=ct0(Rs32) C Intrinsic Prototype: Word32 Q6_R_ct0_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_ct0_R __builtin_HEXAGON_S2_ct0 /* ========================================================================== Assembly Syntax: Rd32=ct0(Rss32) C Intrinsic Prototype: Word32 Q6_R_ct0_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_ct0_P __builtin_HEXAGON_S2_ct0p /* ========================================================================== Assembly Syntax: Rd32=ct1(Rs32) C Intrinsic Prototype: Word32 Q6_R_ct1_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_ct1_R __builtin_HEXAGON_S2_ct1 /* ========================================================================== Assembly Syntax: Rd32=ct1(Rss32) C Intrinsic Prototype: Word32 Q6_R_ct1_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_ct1_P __builtin_HEXAGON_S2_ct1p /* ========================================================================== Assembly Syntax: Rdd32=deinterleave(Rss32) C Intrinsic Prototype: Word64 Q6_P_deinterleave_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_deinterleave_P __builtin_HEXAGON_S2_deinterleave /* ========================================================================== Assembly Syntax: Rd32=extractu(Rs32,#u5,#U5) C Intrinsic Prototype: Word32 Q6_R_extractu_RII(Word32 Rs, Word32 Iu5, Word32 IU5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_extractu_RII __builtin_HEXAGON_S2_extractu /* ========================================================================== Assembly Syntax: Rd32=extractu(Rs32,Rtt32) C Intrinsic Prototype: Word32 Q6_R_extractu_RP(Word32 Rs, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_extractu_RP __builtin_HEXAGON_S2_extractu_rp /* ========================================================================== Assembly Syntax: Rdd32=extractu(Rss32,#u6,#U6) C Intrinsic Prototype: Word64 Q6_P_extractu_PII(Word64 Rss, Word32 Iu6, Word32 IU6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_extractu_PII __builtin_HEXAGON_S2_extractup /* ========================================================================== Assembly Syntax: Rdd32=extractu(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_extractu_PP(Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_extractu_PP __builtin_HEXAGON_S2_extractup_rp /* ========================================================================== Assembly Syntax: Rx32=insert(Rs32,#u5,#U5) C Intrinsic Prototype: Word32 Q6_R_insert_RII(Word32 Rx, Word32 Rs, Word32 Iu5, Word32 IU5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_insert_RII __builtin_HEXAGON_S2_insert /* ========================================================================== Assembly Syntax: Rx32=insert(Rs32,Rtt32) C Intrinsic Prototype: Word32 Q6_R_insert_RP(Word32 Rx, Word32 Rs, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_insert_RP __builtin_HEXAGON_S2_insert_rp /* ========================================================================== Assembly Syntax: Rxx32=insert(Rss32,#u6,#U6) C Intrinsic Prototype: Word64 Q6_P_insert_PII(Word64 Rxx, Word64 Rss, Word32 Iu6, Word32 IU6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_insert_PII __builtin_HEXAGON_S2_insertp /* ========================================================================== Assembly Syntax: Rxx32=insert(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_insert_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_insert_PP __builtin_HEXAGON_S2_insertp_rp /* ========================================================================== Assembly Syntax: Rdd32=interleave(Rss32) C Intrinsic Prototype: Word64 Q6_P_interleave_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_interleave_P __builtin_HEXAGON_S2_interleave /* ========================================================================== Assembly Syntax: Rdd32=lfs(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_lfs_PP(Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lfs_PP __builtin_HEXAGON_S2_lfsp /* ========================================================================== Assembly Syntax: Rdd32=lsl(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_lsl_PR(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lsl_PR __builtin_HEXAGON_S2_lsl_r_p /* ========================================================================== Assembly Syntax: Rxx32+=lsl(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_lslacc_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lslacc_PR __builtin_HEXAGON_S2_lsl_r_p_acc /* ========================================================================== Assembly Syntax: Rxx32&=lsl(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_lsland_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lsland_PR __builtin_HEXAGON_S2_lsl_r_p_and /* ========================================================================== Assembly Syntax: Rxx32-=lsl(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_lslnac_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lslnac_PR __builtin_HEXAGON_S2_lsl_r_p_nac /* ========================================================================== Assembly Syntax: Rxx32|=lsl(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_lslor_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lslor_PR __builtin_HEXAGON_S2_lsl_r_p_or /* ========================================================================== Assembly Syntax: Rxx32^=lsl(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_lslxacc_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lslxacc_PR __builtin_HEXAGON_S2_lsl_r_p_xor /* ========================================================================== Assembly Syntax: Rd32=lsl(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_lsl_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_lsl_RR __builtin_HEXAGON_S2_lsl_r_r /* ========================================================================== Assembly Syntax: Rx32+=lsl(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_lslacc_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_lslacc_RR __builtin_HEXAGON_S2_lsl_r_r_acc /* ========================================================================== Assembly Syntax: Rx32&=lsl(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_lsland_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_lsland_RR __builtin_HEXAGON_S2_lsl_r_r_and /* ========================================================================== Assembly Syntax: Rx32-=lsl(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_lslnac_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_lslnac_RR __builtin_HEXAGON_S2_lsl_r_r_nac /* ========================================================================== Assembly Syntax: Rx32|=lsl(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_lslor_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_lslor_RR __builtin_HEXAGON_S2_lsl_r_r_or /* ========================================================================== Assembly Syntax: Rdd32=vlslh(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vlslh_PR(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vlslh_PR __builtin_HEXAGON_S2_lsl_r_vh /* ========================================================================== Assembly Syntax: Rdd32=vlslw(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vlslw_PR(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vlslw_PR __builtin_HEXAGON_S2_lsl_r_vw /* ========================================================================== Assembly Syntax: Rdd32=lsr(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_lsr_PI(Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lsr_PI __builtin_HEXAGON_S2_lsr_i_p /* ========================================================================== Assembly Syntax: Rxx32+=lsr(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_lsracc_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lsracc_PI __builtin_HEXAGON_S2_lsr_i_p_acc /* ========================================================================== Assembly Syntax: Rxx32&=lsr(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_lsrand_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lsrand_PI __builtin_HEXAGON_S2_lsr_i_p_and /* ========================================================================== Assembly Syntax: Rxx32-=lsr(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_lsrnac_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lsrnac_PI __builtin_HEXAGON_S2_lsr_i_p_nac /* ========================================================================== Assembly Syntax: Rxx32|=lsr(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_lsror_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lsror_PI __builtin_HEXAGON_S2_lsr_i_p_or /* ========================================================================== Assembly Syntax: Rxx32^=lsr(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_lsrxacc_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lsrxacc_PI __builtin_HEXAGON_S2_lsr_i_p_xacc /* ========================================================================== Assembly Syntax: Rd32=lsr(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_lsr_RI(Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_lsr_RI __builtin_HEXAGON_S2_lsr_i_r /* ========================================================================== Assembly Syntax: Rx32+=lsr(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_lsracc_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_lsracc_RI __builtin_HEXAGON_S2_lsr_i_r_acc /* ========================================================================== Assembly Syntax: Rx32&=lsr(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_lsrand_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_lsrand_RI __builtin_HEXAGON_S2_lsr_i_r_and /* ========================================================================== Assembly Syntax: Rx32-=lsr(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_lsrnac_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_lsrnac_RI __builtin_HEXAGON_S2_lsr_i_r_nac /* ========================================================================== Assembly Syntax: Rx32|=lsr(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_lsror_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_lsror_RI __builtin_HEXAGON_S2_lsr_i_r_or /* ========================================================================== Assembly Syntax: Rx32^=lsr(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_lsrxacc_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_lsrxacc_RI __builtin_HEXAGON_S2_lsr_i_r_xacc /* ========================================================================== Assembly Syntax: Rdd32=vlsrh(Rss32,#u4) C Intrinsic Prototype: Word64 Q6_P_vlsrh_PI(Word64 Rss, Word32 Iu4) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vlsrh_PI __builtin_HEXAGON_S2_lsr_i_vh /* ========================================================================== Assembly Syntax: Rdd32=vlsrw(Rss32,#u5) C Intrinsic Prototype: Word64 Q6_P_vlsrw_PI(Word64 Rss, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vlsrw_PI __builtin_HEXAGON_S2_lsr_i_vw /* ========================================================================== Assembly Syntax: Rdd32=lsr(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_lsr_PR(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lsr_PR __builtin_HEXAGON_S2_lsr_r_p /* ========================================================================== Assembly Syntax: Rxx32+=lsr(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_lsracc_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lsracc_PR __builtin_HEXAGON_S2_lsr_r_p_acc /* ========================================================================== Assembly Syntax: Rxx32&=lsr(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_lsrand_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lsrand_PR __builtin_HEXAGON_S2_lsr_r_p_and /* ========================================================================== Assembly Syntax: Rxx32-=lsr(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_lsrnac_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lsrnac_PR __builtin_HEXAGON_S2_lsr_r_p_nac /* ========================================================================== Assembly Syntax: Rxx32|=lsr(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_lsror_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lsror_PR __builtin_HEXAGON_S2_lsr_r_p_or /* ========================================================================== Assembly Syntax: Rxx32^=lsr(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_lsrxacc_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_lsrxacc_PR __builtin_HEXAGON_S2_lsr_r_p_xor /* ========================================================================== Assembly Syntax: Rd32=lsr(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_lsr_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_lsr_RR __builtin_HEXAGON_S2_lsr_r_r /* ========================================================================== Assembly Syntax: Rx32+=lsr(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_lsracc_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_lsracc_RR __builtin_HEXAGON_S2_lsr_r_r_acc /* ========================================================================== Assembly Syntax: Rx32&=lsr(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_lsrand_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_lsrand_RR __builtin_HEXAGON_S2_lsr_r_r_and /* ========================================================================== Assembly Syntax: Rx32-=lsr(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_lsrnac_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_lsrnac_RR __builtin_HEXAGON_S2_lsr_r_r_nac /* ========================================================================== Assembly Syntax: Rx32|=lsr(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_lsror_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_lsror_RR __builtin_HEXAGON_S2_lsr_r_r_or /* ========================================================================== Assembly Syntax: Rdd32=vlsrh(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vlsrh_PR(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vlsrh_PR __builtin_HEXAGON_S2_lsr_r_vh /* ========================================================================== Assembly Syntax: Rdd32=vlsrw(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vlsrw_PR(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vlsrw_PR __builtin_HEXAGON_S2_lsr_r_vw /* ========================================================================== Assembly Syntax: Rdd32=packhl(Rs32,Rt32) C Intrinsic Prototype: Word64 Q6_P_packhl_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU32_3op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_P_packhl_RR __builtin_HEXAGON_S2_packhl /* ========================================================================== Assembly Syntax: Rd32=parity(Rss32,Rtt32) C Intrinsic Prototype: Word32 Q6_R_parity_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_parity_PP __builtin_HEXAGON_S2_parityp /* ========================================================================== Assembly Syntax: Rd32=setbit(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_setbit_RI(Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_setbit_RI __builtin_HEXAGON_S2_setbit_i /* ========================================================================== Assembly Syntax: Rd32=setbit(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_setbit_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_setbit_RR __builtin_HEXAGON_S2_setbit_r /* ========================================================================== Assembly Syntax: Rdd32=shuffeb(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_shuffeb_PP(Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_shuffeb_PP __builtin_HEXAGON_S2_shuffeb /* ========================================================================== Assembly Syntax: Rdd32=shuffeh(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_shuffeh_PP(Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_shuffeh_PP __builtin_HEXAGON_S2_shuffeh /* ========================================================================== Assembly Syntax: Rdd32=shuffob(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_shuffob_PP(Word64 Rtt, Word64 Rss) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_shuffob_PP __builtin_HEXAGON_S2_shuffob /* ========================================================================== Assembly Syntax: Rdd32=shuffoh(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_shuffoh_PP(Word64 Rtt, Word64 Rss) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_shuffoh_PP __builtin_HEXAGON_S2_shuffoh /* ========================================================================== Assembly Syntax: memb(Rx32++#s4:0:circ(Mu2))=Rt32 C Intrinsic Prototype: void Q6_memb_IMR_circ(void** Rx, Word32 Is4_0, Word32 Mu, Word32 Rt, void* BaseAddress) Instruction Type: ST Execution Slots: SLOT01 ========================================================================== */ #define Q6_memb_IMR_circ __builtin_HEXAGON_S2_storerb_pci /* ========================================================================== Assembly Syntax: memb(Rx32++I:circ(Mu2))=Rt32 C Intrinsic Prototype: void Q6_memb_MR_circ(void** Rx, Word32 Mu, Word32 Rt, void* BaseAddress) Instruction Type: ST Execution Slots: SLOT01 ========================================================================== */ #define Q6_memb_MR_circ __builtin_HEXAGON_S2_storerb_pcr /* ========================================================================== Assembly Syntax: memd(Rx32++#s4:3:circ(Mu2))=Rtt32 C Intrinsic Prototype: void Q6_memd_IMP_circ(void** Rx, Word32 Is4_3, Word32 Mu, Word64 Rtt, void* BaseAddress) Instruction Type: ST Execution Slots: SLOT01 ========================================================================== */ #define Q6_memd_IMP_circ __builtin_HEXAGON_S2_storerd_pci /* ========================================================================== Assembly Syntax: memd(Rx32++I:circ(Mu2))=Rtt32 C Intrinsic Prototype: void Q6_memd_MP_circ(void** Rx, Word32 Mu, Word64 Rtt, void* BaseAddress) Instruction Type: ST Execution Slots: SLOT01 ========================================================================== */ #define Q6_memd_MP_circ __builtin_HEXAGON_S2_storerd_pcr /* ========================================================================== Assembly Syntax: memh(Rx32++#s4:1:circ(Mu2))=Rt32.h C Intrinsic Prototype: void Q6_memh_IMRh_circ(void** Rx, Word32 Is4_1, Word32 Mu, Word32 Rt, void* BaseAddress) Instruction Type: ST Execution Slots: SLOT01 ========================================================================== */ #define Q6_memh_IMRh_circ __builtin_HEXAGON_S2_storerf_pci /* ========================================================================== Assembly Syntax: memh(Rx32++I:circ(Mu2))=Rt32.h C Intrinsic Prototype: void Q6_memh_MRh_circ(void** Rx, Word32 Mu, Word32 Rt, void* BaseAddress) Instruction Type: ST Execution Slots: SLOT01 ========================================================================== */ #define Q6_memh_MRh_circ __builtin_HEXAGON_S2_storerf_pcr /* ========================================================================== Assembly Syntax: memh(Rx32++#s4:1:circ(Mu2))=Rt32 C Intrinsic Prototype: void Q6_memh_IMR_circ(void** Rx, Word32 Is4_1, Word32 Mu, Word32 Rt, void* BaseAddress) Instruction Type: ST Execution Slots: SLOT01 ========================================================================== */ #define Q6_memh_IMR_circ __builtin_HEXAGON_S2_storerh_pci /* ========================================================================== Assembly Syntax: memh(Rx32++I:circ(Mu2))=Rt32 C Intrinsic Prototype: void Q6_memh_MR_circ(void** Rx, Word32 Mu, Word32 Rt, void* BaseAddress) Instruction Type: ST Execution Slots: SLOT01 ========================================================================== */ #define Q6_memh_MR_circ __builtin_HEXAGON_S2_storerh_pcr /* ========================================================================== Assembly Syntax: memw(Rx32++#s4:2:circ(Mu2))=Rt32 C Intrinsic Prototype: void Q6_memw_IMR_circ(void** Rx, Word32 Is4_2, Word32 Mu, Word32 Rt, void* BaseAddress) Instruction Type: ST Execution Slots: SLOT01 ========================================================================== */ #define Q6_memw_IMR_circ __builtin_HEXAGON_S2_storeri_pci /* ========================================================================== Assembly Syntax: memw(Rx32++I:circ(Mu2))=Rt32 C Intrinsic Prototype: void Q6_memw_MR_circ(void** Rx, Word32 Mu, Word32 Rt, void* BaseAddress) Instruction Type: ST Execution Slots: SLOT01 ========================================================================== */ #define Q6_memw_MR_circ __builtin_HEXAGON_S2_storeri_pcr /* ========================================================================== Assembly Syntax: Rd32=vsathb(Rs32) C Intrinsic Prototype: Word32 Q6_R_vsathb_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vsathb_R __builtin_HEXAGON_S2_svsathb /* ========================================================================== Assembly Syntax: Rd32=vsathub(Rs32) C Intrinsic Prototype: Word32 Q6_R_vsathub_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vsathub_R __builtin_HEXAGON_S2_svsathub /* ========================================================================== Assembly Syntax: Rx32=tableidxb(Rs32,#u4,#U5) C Intrinsic Prototype: Word32 Q6_R_tableidxb_RII(Word32 Rx, Word32 Rs, Word32 Iu4, Word32 IU5) Instruction Type: S_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_tableidxb_RII __builtin_HEXAGON_S2_tableidxb_goodsyntax /* ========================================================================== Assembly Syntax: Rx32=tableidxd(Rs32,#u4,#U5) C Intrinsic Prototype: Word32 Q6_R_tableidxd_RII(Word32 Rx, Word32 Rs, Word32 Iu4, Word32 IU5) Instruction Type: S_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_tableidxd_RII __builtin_HEXAGON_S2_tableidxd_goodsyntax /* ========================================================================== Assembly Syntax: Rx32=tableidxh(Rs32,#u4,#U5) C Intrinsic Prototype: Word32 Q6_R_tableidxh_RII(Word32 Rx, Word32 Rs, Word32 Iu4, Word32 IU5) Instruction Type: S_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_tableidxh_RII __builtin_HEXAGON_S2_tableidxh_goodsyntax /* ========================================================================== Assembly Syntax: Rx32=tableidxw(Rs32,#u4,#U5) C Intrinsic Prototype: Word32 Q6_R_tableidxw_RII(Word32 Rx, Word32 Rs, Word32 Iu4, Word32 IU5) Instruction Type: S_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_tableidxw_RII __builtin_HEXAGON_S2_tableidxw_goodsyntax /* ========================================================================== Assembly Syntax: Rd32=togglebit(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_togglebit_RI(Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_togglebit_RI __builtin_HEXAGON_S2_togglebit_i /* ========================================================================== Assembly Syntax: Rd32=togglebit(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_togglebit_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_togglebit_RR __builtin_HEXAGON_S2_togglebit_r /* ========================================================================== Assembly Syntax: Pd4=tstbit(Rs32,#u5) C Intrinsic Prototype: Byte Q6_p_tstbit_RI(Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_tstbit_RI __builtin_HEXAGON_S2_tstbit_i /* ========================================================================== Assembly Syntax: Pd4=tstbit(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_tstbit_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_tstbit_RR __builtin_HEXAGON_S2_tstbit_r /* ========================================================================== Assembly Syntax: Rdd32=valignb(Rtt32,Rss32,#u3) C Intrinsic Prototype: Word64 Q6_P_valignb_PPI(Word64 Rtt, Word64 Rss, Word32 Iu3) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_valignb_PPI __builtin_HEXAGON_S2_valignib /* ========================================================================== Assembly Syntax: Rdd32=valignb(Rtt32,Rss32,Pu4) C Intrinsic Prototype: Word64 Q6_P_valignb_PPp(Word64 Rtt, Word64 Rss, Byte Pu) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_valignb_PPp __builtin_HEXAGON_S2_valignrb /* ========================================================================== Assembly Syntax: Rdd32=vcnegh(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vcnegh_PR(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vcnegh_PR __builtin_HEXAGON_S2_vcnegh /* ========================================================================== Assembly Syntax: Rdd32=vcrotate(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vcrotate_PR(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vcrotate_PR __builtin_HEXAGON_S2_vcrotate /* ========================================================================== Assembly Syntax: Rxx32+=vrcnegh(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_vrcneghacc_PR(Word64 Rxx, Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrcneghacc_PR __builtin_HEXAGON_S2_vrcnegh /* ========================================================================== Assembly Syntax: Rd32=vrndwh(Rss32) C Intrinsic Prototype: Word32 Q6_R_vrndwh_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vrndwh_P __builtin_HEXAGON_S2_vrndpackwh /* ========================================================================== Assembly Syntax: Rd32=vrndwh(Rss32):sat C Intrinsic Prototype: Word32 Q6_R_vrndwh_P_sat(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vrndwh_P_sat __builtin_HEXAGON_S2_vrndpackwhs /* ========================================================================== Assembly Syntax: Rd32=vsathb(Rss32) C Intrinsic Prototype: Word32 Q6_R_vsathb_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vsathb_P __builtin_HEXAGON_S2_vsathb /* ========================================================================== Assembly Syntax: Rdd32=vsathb(Rss32) C Intrinsic Prototype: Word64 Q6_P_vsathb_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vsathb_P __builtin_HEXAGON_S2_vsathb_nopack /* ========================================================================== Assembly Syntax: Rd32=vsathub(Rss32) C Intrinsic Prototype: Word32 Q6_R_vsathub_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vsathub_P __builtin_HEXAGON_S2_vsathub /* ========================================================================== Assembly Syntax: Rdd32=vsathub(Rss32) C Intrinsic Prototype: Word64 Q6_P_vsathub_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vsathub_P __builtin_HEXAGON_S2_vsathub_nopack /* ========================================================================== Assembly Syntax: Rd32=vsatwh(Rss32) C Intrinsic Prototype: Word32 Q6_R_vsatwh_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vsatwh_P __builtin_HEXAGON_S2_vsatwh /* ========================================================================== Assembly Syntax: Rdd32=vsatwh(Rss32) C Intrinsic Prototype: Word64 Q6_P_vsatwh_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vsatwh_P __builtin_HEXAGON_S2_vsatwh_nopack /* ========================================================================== Assembly Syntax: Rd32=vsatwuh(Rss32) C Intrinsic Prototype: Word32 Q6_R_vsatwuh_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vsatwuh_P __builtin_HEXAGON_S2_vsatwuh /* ========================================================================== Assembly Syntax: Rdd32=vsatwuh(Rss32) C Intrinsic Prototype: Word64 Q6_P_vsatwuh_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vsatwuh_P __builtin_HEXAGON_S2_vsatwuh_nopack /* ========================================================================== Assembly Syntax: Rd32=vsplatb(Rs32) C Intrinsic Prototype: Word32 Q6_R_vsplatb_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vsplatb_R __builtin_HEXAGON_S2_vsplatrb /* ========================================================================== Assembly Syntax: Rdd32=vsplath(Rs32) C Intrinsic Prototype: Word64 Q6_P_vsplath_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vsplath_R __builtin_HEXAGON_S2_vsplatrh /* ========================================================================== Assembly Syntax: Rdd32=vspliceb(Rss32,Rtt32,#u3) C Intrinsic Prototype: Word64 Q6_P_vspliceb_PPI(Word64 Rss, Word64 Rtt, Word32 Iu3) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vspliceb_PPI __builtin_HEXAGON_S2_vspliceib /* ========================================================================== Assembly Syntax: Rdd32=vspliceb(Rss32,Rtt32,Pu4) C Intrinsic Prototype: Word64 Q6_P_vspliceb_PPp(Word64 Rss, Word64 Rtt, Byte Pu) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vspliceb_PPp __builtin_HEXAGON_S2_vsplicerb /* ========================================================================== Assembly Syntax: Rdd32=vsxtbh(Rs32) C Intrinsic Prototype: Word64 Q6_P_vsxtbh_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vsxtbh_R __builtin_HEXAGON_S2_vsxtbh /* ========================================================================== Assembly Syntax: Rdd32=vsxthw(Rs32) C Intrinsic Prototype: Word64 Q6_P_vsxthw_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vsxthw_R __builtin_HEXAGON_S2_vsxthw /* ========================================================================== Assembly Syntax: Rd32=vtrunehb(Rss32) C Intrinsic Prototype: Word32 Q6_R_vtrunehb_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vtrunehb_P __builtin_HEXAGON_S2_vtrunehb /* ========================================================================== Assembly Syntax: Rdd32=vtrunewh(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vtrunewh_PP(Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vtrunewh_PP __builtin_HEXAGON_S2_vtrunewh /* ========================================================================== Assembly Syntax: Rd32=vtrunohb(Rss32) C Intrinsic Prototype: Word32 Q6_R_vtrunohb_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vtrunohb_P __builtin_HEXAGON_S2_vtrunohb /* ========================================================================== Assembly Syntax: Rdd32=vtrunowh(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vtrunowh_PP(Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vtrunowh_PP __builtin_HEXAGON_S2_vtrunowh /* ========================================================================== Assembly Syntax: Rdd32=vzxtbh(Rs32) C Intrinsic Prototype: Word64 Q6_P_vzxtbh_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vzxtbh_R __builtin_HEXAGON_S2_vzxtbh /* ========================================================================== Assembly Syntax: Rdd32=vzxthw(Rs32) C Intrinsic Prototype: Word64 Q6_P_vzxthw_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vzxthw_R __builtin_HEXAGON_S2_vzxthw /* ========================================================================== Assembly Syntax: Rd32=add(Rs32,add(Ru32,#s6)) C Intrinsic Prototype: Word32 Q6_R_add_add_RRI(Word32 Rs, Word32 Ru, Word32 Is6) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_add_RRI __builtin_HEXAGON_S4_addaddi /* ========================================================================== Assembly Syntax: Rx32=add(#u8,asl(Rx32,#U5)) C Intrinsic Prototype: Word32 Q6_R_add_asl_IRI(Word32 Iu8, Word32 Rx, Word32 IU5) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_asl_IRI __builtin_HEXAGON_S4_addi_asl_ri /* ========================================================================== Assembly Syntax: Rx32=add(#u8,lsr(Rx32,#U5)) C Intrinsic Prototype: Word32 Q6_R_add_lsr_IRI(Word32 Iu8, Word32 Rx, Word32 IU5) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_lsr_IRI __builtin_HEXAGON_S4_addi_lsr_ri /* ========================================================================== Assembly Syntax: Rx32=and(#u8,asl(Rx32,#U5)) C Intrinsic Prototype: Word32 Q6_R_and_asl_IRI(Word32 Iu8, Word32 Rx, Word32 IU5) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_and_asl_IRI __builtin_HEXAGON_S4_andi_asl_ri /* ========================================================================== Assembly Syntax: Rx32=and(#u8,lsr(Rx32,#U5)) C Intrinsic Prototype: Word32 Q6_R_and_lsr_IRI(Word32 Iu8, Word32 Rx, Word32 IU5) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_and_lsr_IRI __builtin_HEXAGON_S4_andi_lsr_ri /* ========================================================================== Assembly Syntax: Rd32=add(clb(Rs32),#s6) C Intrinsic Prototype: Word32 Q6_R_add_clb_RI(Word32 Rs, Word32 Is6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_clb_RI __builtin_HEXAGON_S4_clbaddi /* ========================================================================== Assembly Syntax: Rd32=add(clb(Rss32),#s6) C Intrinsic Prototype: Word32 Q6_R_add_clb_PI(Word64 Rss, Word32 Is6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_clb_PI __builtin_HEXAGON_S4_clbpaddi /* ========================================================================== Assembly Syntax: Rd32=normamt(Rss32) C Intrinsic Prototype: Word32 Q6_R_normamt_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_normamt_P __builtin_HEXAGON_S4_clbpnorm /* ========================================================================== Assembly Syntax: Rd32=extract(Rs32,#u5,#U5) C Intrinsic Prototype: Word32 Q6_R_extract_RII(Word32 Rs, Word32 Iu5, Word32 IU5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_extract_RII __builtin_HEXAGON_S4_extract /* ========================================================================== Assembly Syntax: Rd32=extract(Rs32,Rtt32) C Intrinsic Prototype: Word32 Q6_R_extract_RP(Word32 Rs, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_extract_RP __builtin_HEXAGON_S4_extract_rp /* ========================================================================== Assembly Syntax: Rdd32=extract(Rss32,#u6,#U6) C Intrinsic Prototype: Word64 Q6_P_extract_PII(Word64 Rss, Word32 Iu6, Word32 IU6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_extract_PII __builtin_HEXAGON_S4_extractp /* ========================================================================== Assembly Syntax: Rdd32=extract(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_extract_PP(Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_extract_PP __builtin_HEXAGON_S4_extractp_rp /* ========================================================================== Assembly Syntax: Rd32=lsl(#s6,Rt32) C Intrinsic Prototype: Word32 Q6_R_lsl_IR(Word32 Is6, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_lsl_IR __builtin_HEXAGON_S4_lsli /* ========================================================================== Assembly Syntax: Pd4=!tstbit(Rs32,#u5) C Intrinsic Prototype: Byte Q6_p_not_tstbit_RI(Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_not_tstbit_RI __builtin_HEXAGON_S4_ntstbit_i /* ========================================================================== Assembly Syntax: Pd4=!tstbit(Rs32,Rt32) C Intrinsic Prototype: Byte Q6_p_not_tstbit_RR(Word32 Rs, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_not_tstbit_RR __builtin_HEXAGON_S4_ntstbit_r /* ========================================================================== Assembly Syntax: Rx32|=and(Rs32,#s10) C Intrinsic Prototype: Word32 Q6_R_andor_RI(Word32 Rx, Word32 Rs, Word32 Is10) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_andor_RI __builtin_HEXAGON_S4_or_andi /* ========================================================================== Assembly Syntax: Rx32=or(Ru32,and(Rx32,#s10)) C Intrinsic Prototype: Word32 Q6_R_or_and_RRI(Word32 Ru, Word32 Rx, Word32 Is10) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_or_and_RRI __builtin_HEXAGON_S4_or_andix /* ========================================================================== Assembly Syntax: Rx32|=or(Rs32,#s10) C Intrinsic Prototype: Word32 Q6_R_oror_RI(Word32 Rx, Word32 Rs, Word32 Is10) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_oror_RI __builtin_HEXAGON_S4_or_ori /* ========================================================================== Assembly Syntax: Rx32=or(#u8,asl(Rx32,#U5)) C Intrinsic Prototype: Word32 Q6_R_or_asl_IRI(Word32 Iu8, Word32 Rx, Word32 IU5) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_or_asl_IRI __builtin_HEXAGON_S4_ori_asl_ri /* ========================================================================== Assembly Syntax: Rx32=or(#u8,lsr(Rx32,#U5)) C Intrinsic Prototype: Word32 Q6_R_or_lsr_IRI(Word32 Iu8, Word32 Rx, Word32 IU5) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_or_lsr_IRI __builtin_HEXAGON_S4_ori_lsr_ri /* ========================================================================== Assembly Syntax: Rd32=parity(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_parity_RR(Word32 Rs, Word32 Rt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_parity_RR __builtin_HEXAGON_S4_parity /* ========================================================================== Assembly Syntax: Rd32=add(Rs32,sub(#s6,Ru32)) C Intrinsic Prototype: Word32 Q6_R_add_sub_RIR(Word32 Rs, Word32 Is6, Word32 Ru) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_add_sub_RIR __builtin_HEXAGON_S4_subaddi /* ========================================================================== Assembly Syntax: Rx32=sub(#u8,asl(Rx32,#U5)) C Intrinsic Prototype: Word32 Q6_R_sub_asl_IRI(Word32 Iu8, Word32 Rx, Word32 IU5) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sub_asl_IRI __builtin_HEXAGON_S4_subi_asl_ri /* ========================================================================== Assembly Syntax: Rx32=sub(#u8,lsr(Rx32,#U5)) C Intrinsic Prototype: Word32 Q6_R_sub_lsr_IRI(Word32 Iu8, Word32 Rx, Word32 IU5) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_sub_lsr_IRI __builtin_HEXAGON_S4_subi_lsr_ri /* ========================================================================== Assembly Syntax: Rdd32=vrcrotate(Rss32,Rt32,#u2) C Intrinsic Prototype: Word64 Q6_P_vrcrotate_PRI(Word64 Rss, Word32 Rt, Word32 Iu2) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrcrotate_PRI __builtin_HEXAGON_S4_vrcrotate /* ========================================================================== Assembly Syntax: Rxx32+=vrcrotate(Rss32,Rt32,#u2) C Intrinsic Prototype: Word64 Q6_P_vrcrotateacc_PRI(Word64 Rxx, Word64 Rss, Word32 Rt, Word32 Iu2) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vrcrotateacc_PRI __builtin_HEXAGON_S4_vrcrotate_acc /* ========================================================================== Assembly Syntax: Rdd32=vxaddsubh(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vxaddsubh_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vxaddsubh_PP_sat __builtin_HEXAGON_S4_vxaddsubh /* ========================================================================== Assembly Syntax: Rdd32=vxaddsubh(Rss32,Rtt32):rnd:>>1:sat C Intrinsic Prototype: Word64 Q6_P_vxaddsubh_PP_rnd_rs1_sat(Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vxaddsubh_PP_rnd_rs1_sat __builtin_HEXAGON_S4_vxaddsubhr /* ========================================================================== Assembly Syntax: Rdd32=vxaddsubw(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vxaddsubw_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vxaddsubw_PP_sat __builtin_HEXAGON_S4_vxaddsubw /* ========================================================================== Assembly Syntax: Rdd32=vxsubaddh(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vxsubaddh_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vxsubaddh_PP_sat __builtin_HEXAGON_S4_vxsubaddh /* ========================================================================== Assembly Syntax: Rdd32=vxsubaddh(Rss32,Rtt32):rnd:>>1:sat C Intrinsic Prototype: Word64 Q6_P_vxsubaddh_PP_rnd_rs1_sat(Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vxsubaddh_PP_rnd_rs1_sat __builtin_HEXAGON_S4_vxsubaddhr /* ========================================================================== Assembly Syntax: Rdd32=vxsubaddw(Rss32,Rtt32):sat C Intrinsic Prototype: Word64 Q6_P_vxsubaddw_PP_sat(Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vxsubaddw_PP_sat __builtin_HEXAGON_S4_vxsubaddw /* ========================================================================== Assembly Syntax: Rd32=vasrhub(Rss32,#u4):rnd:sat C Intrinsic Prototype: Word32 Q6_R_vasrhub_PI_rnd_sat(Word64 Rss, Word32 Iu4) Instruction Type: S_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_R_vasrhub_PI_rnd_sat __builtin_HEXAGON_S5_asrhub_rnd_sat_goodsyntax /* ========================================================================== Assembly Syntax: Rd32=vasrhub(Rss32,#u4):sat C Intrinsic Prototype: Word32 Q6_R_vasrhub_PI_sat(Word64 Rss, Word32 Iu4) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_vasrhub_PI_sat __builtin_HEXAGON_S5_asrhub_sat /* ========================================================================== Assembly Syntax: Rd32=popcount(Rss32) C Intrinsic Prototype: Word32 Q6_R_popcount_P(Word64 Rss) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_popcount_P __builtin_HEXAGON_S5_popcountp /* ========================================================================== Assembly Syntax: Rdd32=vasrh(Rss32,#u4):rnd C Intrinsic Prototype: Word64 Q6_P_vasrh_PI_rnd(Word64 Rss, Word32 Iu4) Instruction Type: S_2op Execution Slots: SLOT0123 ========================================================================== */ #define Q6_P_vasrh_PI_rnd __builtin_HEXAGON_S5_vasrhrnd_goodsyntax /* ========================================================================== Assembly Syntax: dccleana(Rs32) C Intrinsic Prototype: void Q6_dccleana_A(Address Rs) Instruction Type: ST Execution Slots: SLOT0 ========================================================================== */ #define Q6_dccleana_A __builtin_HEXAGON_Y2_dccleana /* ========================================================================== Assembly Syntax: dccleaninva(Rs32) C Intrinsic Prototype: void Q6_dccleaninva_A(Address Rs) Instruction Type: ST Execution Slots: SLOT0 ========================================================================== */ #define Q6_dccleaninva_A __builtin_HEXAGON_Y2_dccleaninva /* ========================================================================== Assembly Syntax: dcfetch(Rs32) C Intrinsic Prototype: void Q6_dcfetch_A(Address Rs) Instruction Type: MAPPING Execution Slots: SLOT0123 ========================================================================== */ #define Q6_dcfetch_A __builtin_HEXAGON_Y2_dcfetch /* ========================================================================== Assembly Syntax: dcinva(Rs32) C Intrinsic Prototype: void Q6_dcinva_A(Address Rs) Instruction Type: ST Execution Slots: SLOT0 ========================================================================== */ #define Q6_dcinva_A __builtin_HEXAGON_Y2_dcinva /* ========================================================================== Assembly Syntax: dczeroa(Rs32) C Intrinsic Prototype: void Q6_dczeroa_A(Address Rs) Instruction Type: ST Execution Slots: SLOT0 ========================================================================== */ #define Q6_dczeroa_A __builtin_HEXAGON_Y2_dczeroa /* ========================================================================== Assembly Syntax: l2fetch(Rs32,Rt32) C Intrinsic Prototype: void Q6_l2fetch_AR(Address Rs, Word32 Rt) Instruction Type: ST Execution Slots: SLOT0 ========================================================================== */ #define Q6_l2fetch_AR __builtin_HEXAGON_Y4_l2fetch /* ========================================================================== Assembly Syntax: l2fetch(Rs32,Rtt32) C Intrinsic Prototype: void Q6_l2fetch_AP(Address Rs, Word64 Rtt) Instruction Type: ST Execution Slots: SLOT0 ========================================================================== */ #define Q6_l2fetch_AP __builtin_HEXAGON_Y5_l2fetch #if __HEXAGON_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Rdd32=rol(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_rol_PI(Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_rol_PI __builtin_HEXAGON_S6_rol_i_p #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HEXAGON_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Rxx32+=rol(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_rolacc_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_rolacc_PI __builtin_HEXAGON_S6_rol_i_p_acc #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HEXAGON_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Rxx32&=rol(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_roland_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_roland_PI __builtin_HEXAGON_S6_rol_i_p_and #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HEXAGON_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Rxx32-=rol(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_rolnac_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_rolnac_PI __builtin_HEXAGON_S6_rol_i_p_nac #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HEXAGON_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Rxx32|=rol(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_rolor_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_rolor_PI __builtin_HEXAGON_S6_rol_i_p_or #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HEXAGON_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Rxx32^=rol(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_rolxacc_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_rolxacc_PI __builtin_HEXAGON_S6_rol_i_p_xacc #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HEXAGON_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Rd32=rol(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_rol_RI(Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_rol_RI __builtin_HEXAGON_S6_rol_i_r #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HEXAGON_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Rx32+=rol(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_rolacc_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_rolacc_RI __builtin_HEXAGON_S6_rol_i_r_acc #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HEXAGON_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Rx32&=rol(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_roland_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_roland_RI __builtin_HEXAGON_S6_rol_i_r_and #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HEXAGON_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Rx32-=rol(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_rolnac_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_rolnac_RI __builtin_HEXAGON_S6_rol_i_r_nac #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HEXAGON_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Rx32|=rol(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_rolor_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_rolor_RI __builtin_HEXAGON_S6_rol_i_r_or #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HEXAGON_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Rx32^=rol(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_rolxacc_RI(Word32 Rx, Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_rolxacc_RI __builtin_HEXAGON_S6_rol_i_r_xacc #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HEXAGON_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Rdd32=vabsdiffb(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vabsdiffb_PP(Word64 Rtt, Word64 Rss) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vabsdiffb_PP __builtin_HEXAGON_M6_vabsdiffb #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HEXAGON_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Rdd32=vabsdiffub(Rtt32,Rss32) C Intrinsic Prototype: Word64 Q6_P_vabsdiffub_PP(Word64 Rtt, Word64 Rss) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vabsdiffub_PP __builtin_HEXAGON_M6_vabsdiffub #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HEXAGON_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Rdd32=vsplatb(Rs32) C Intrinsic Prototype: Word64 Q6_P_vsplatb_R(Word32 Rs) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vsplatb_R __builtin_HEXAGON_S6_vsplatrbp #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HEXAGON_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Rdd32=vtrunehb(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vtrunehb_PP(Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vtrunehb_PP __builtin_HEXAGON_S6_vtrunehb_ppp #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HEXAGON_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Rdd32=vtrunohb(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vtrunohb_PP(Word64 Rss, Word64 Rtt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vtrunohb_PP __builtin_HEXAGON_S6_vtrunohb_ppp #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HEXAGON_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Pd4=!any8(vcmpb.eq(Rss32,Rtt32)) C Intrinsic Prototype: Byte Q6_p_not_any8_vcmpb_eq_PP(Word64 Rss, Word64 Rtt) Instruction Type: ALU64 Execution Slots: SLOT23 ========================================================================== */ #define Q6_p_not_any8_vcmpb_eq_PP __builtin_HEXAGON_A6_vcmpbeq_notany #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HEXAGON_ARCH__ >= 66 /* ========================================================================== Assembly Syntax: Rdd32=dfadd(Rss32,Rtt32) C Intrinsic Prototype: Float64 Q6_P_dfadd_PP(Float64 Rss, Float64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_dfadd_PP __builtin_HEXAGON_F2_dfadd #endif /* __HEXAGON_ARCH___ >= 66 */ #if __HEXAGON_ARCH__ >= 66 /* ========================================================================== Assembly Syntax: Rdd32=dfsub(Rss32,Rtt32) C Intrinsic Prototype: Float64 Q6_P_dfsub_PP(Float64 Rss, Float64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_dfsub_PP __builtin_HEXAGON_F2_dfsub #endif /* __HEXAGON_ARCH___ >= 66 */ #if __HEXAGON_ARCH__ >= 66 /* ========================================================================== Assembly Syntax: Rx32-=mpyi(Rs32,Rt32) C Intrinsic Prototype: Word32 Q6_R_mpyinac_RR(Word32 Rx, Word32 Rs, Word32 Rt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mpyinac_RR __builtin_HEXAGON_M2_mnaci #endif /* __HEXAGON_ARCH___ >= 66 */ #if __HEXAGON_ARCH__ >= 66 /* ========================================================================== Assembly Syntax: Rd32=mask(#u5,#U5) C Intrinsic Prototype: Word32 Q6_R_mask_II(Word32 Iu5, Word32 IU5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_mask_II __builtin_HEXAGON_S2_mask #endif /* __HEXAGON_ARCH___ >= 66 */ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rd32=clip(Rs32,#u5) C Intrinsic Prototype: Word32 Q6_R_clip_RI(Word32 Rs, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_R_clip_RI __builtin_HEXAGON_A7_clip #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rdd32=cround(Rss32,#u6) C Intrinsic Prototype: Word64 Q6_P_cround_PI(Word64 Rss, Word32 Iu6) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cround_PI __builtin_HEXAGON_A7_croundd_ri #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rdd32=cround(Rss32,Rt32) C Intrinsic Prototype: Word64 Q6_P_cround_PR(Word64 Rss, Word32 Rt) Instruction Type: S_3op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_cround_PR __builtin_HEXAGON_A7_croundd_rr #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rdd32=vclip(Rss32,#u5) C Intrinsic Prototype: Word64 Q6_P_vclip_PI(Word64 Rss, Word32 Iu5) Instruction Type: S_2op Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_vclip_PI __builtin_HEXAGON_A7_vclip #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 /* ========================================================================== Assembly Syntax: Rdd32=dfmax(Rss32,Rtt32) C Intrinsic Prototype: Float64 Q6_P_dfmax_PP(Float64 Rss, Float64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_dfmax_PP __builtin_HEXAGON_F2_dfmax #endif /* __HEXAGON_ARCH___ >= 67 */ #if __HEXAGON_ARCH__ >= 67 /* ========================================================================== Assembly Syntax: Rdd32=dfmin(Rss32,Rtt32) C Intrinsic Prototype: Float64 Q6_P_dfmin_PP(Float64 Rss, Float64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_dfmin_PP __builtin_HEXAGON_F2_dfmin #endif /* __HEXAGON_ARCH___ >= 67 */ #if __HEXAGON_ARCH__ >= 67 /* ========================================================================== Assembly Syntax: Rdd32=dfmpyfix(Rss32,Rtt32) C Intrinsic Prototype: Float64 Q6_P_dfmpyfix_PP(Float64 Rss, Float64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_dfmpyfix_PP __builtin_HEXAGON_F2_dfmpyfix #endif /* __HEXAGON_ARCH___ >= 67 */ #if __HEXAGON_ARCH__ >= 67 /* ========================================================================== Assembly Syntax: Rxx32+=dfmpyhh(Rss32,Rtt32) C Intrinsic Prototype: Float64 Q6_P_dfmpyhhacc_PP(Float64 Rxx, Float64 Rss, Float64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_dfmpyhhacc_PP __builtin_HEXAGON_F2_dfmpyhh #endif /* __HEXAGON_ARCH___ >= 67 */ #if __HEXAGON_ARCH__ >= 67 /* ========================================================================== Assembly Syntax: Rxx32+=dfmpylh(Rss32,Rtt32) C Intrinsic Prototype: Float64 Q6_P_dfmpylhacc_PP(Float64 Rxx, Float64 Rss, Float64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_dfmpylhacc_PP __builtin_HEXAGON_F2_dfmpylh #endif /* __HEXAGON_ARCH___ >= 67 */ #if __HEXAGON_ARCH__ >= 67 /* ========================================================================== Assembly Syntax: Rdd32=dfmpyll(Rss32,Rtt32) C Intrinsic Prototype: Float64 Q6_P_dfmpyll_PP(Float64 Rss, Float64 Rtt) Instruction Type: M Execution Slots: SLOT23 ========================================================================== */ #define Q6_P_dfmpyll_PP __builtin_HEXAGON_F2_dfmpyll #endif /* __HEXAGON_ARCH___ >= 67 */ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rdd32=cmpyiw(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_cmpyiw_PP(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_P_cmpyiw_PP __builtin_HEXAGON_M7_dcmpyiw #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rxx32+=cmpyiw(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_cmpyiwacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_P_cmpyiwacc_PP __builtin_HEXAGON_M7_dcmpyiw_acc #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rdd32=cmpyiw(Rss32,Rtt32*) C Intrinsic Prototype: Word64 Q6_P_cmpyiw_PP_conj(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_P_cmpyiw_PP_conj __builtin_HEXAGON_M7_dcmpyiwc #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rxx32+=cmpyiw(Rss32,Rtt32*) C Intrinsic Prototype: Word64 Q6_P_cmpyiwacc_PP_conj(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_P_cmpyiwacc_PP_conj __builtin_HEXAGON_M7_dcmpyiwc_acc #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rdd32=cmpyrw(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_cmpyrw_PP(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_P_cmpyrw_PP __builtin_HEXAGON_M7_dcmpyrw #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rxx32+=cmpyrw(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_cmpyrwacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_P_cmpyrwacc_PP __builtin_HEXAGON_M7_dcmpyrw_acc #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rdd32=cmpyrw(Rss32,Rtt32*) C Intrinsic Prototype: Word64 Q6_P_cmpyrw_PP_conj(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_P_cmpyrw_PP_conj __builtin_HEXAGON_M7_dcmpyrwc #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rxx32+=cmpyrw(Rss32,Rtt32*) C Intrinsic Prototype: Word64 Q6_P_cmpyrwacc_PP_conj(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_P_cmpyrwacc_PP_conj __builtin_HEXAGON_M7_dcmpyrwc_acc #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rdd32=vdmpyw(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vdmpyw_PP(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_P_vdmpyw_PP __builtin_HEXAGON_M7_vdmpy #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rxx32+=vdmpyw(Rss32,Rtt32) C Intrinsic Prototype: Word64 Q6_P_vdmpywacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_P_vdmpywacc_PP __builtin_HEXAGON_M7_vdmpy_acc #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rd32=cmpyiw(Rss32,Rtt32):<<1:sat C Intrinsic Prototype: Word32 Q6_R_cmpyiw_PP_s1_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_R_cmpyiw_PP_s1_sat __builtin_HEXAGON_M7_wcmpyiw #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rd32=cmpyiw(Rss32,Rtt32):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_cmpyiw_PP_s1_rnd_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_R_cmpyiw_PP_s1_rnd_sat __builtin_HEXAGON_M7_wcmpyiw_rnd #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rd32=cmpyiw(Rss32,Rtt32*):<<1:sat C Intrinsic Prototype: Word32 Q6_R_cmpyiw_PP_conj_s1_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_R_cmpyiw_PP_conj_s1_sat __builtin_HEXAGON_M7_wcmpyiwc #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rd32=cmpyiw(Rss32,Rtt32*):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_cmpyiw_PP_conj_s1_rnd_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_R_cmpyiw_PP_conj_s1_rnd_sat __builtin_HEXAGON_M7_wcmpyiwc_rnd #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rd32=cmpyrw(Rss32,Rtt32):<<1:sat C Intrinsic Prototype: Word32 Q6_R_cmpyrw_PP_s1_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_R_cmpyrw_PP_s1_sat __builtin_HEXAGON_M7_wcmpyrw #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rd32=cmpyrw(Rss32,Rtt32):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_cmpyrw_PP_s1_rnd_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_R_cmpyrw_PP_s1_rnd_sat __builtin_HEXAGON_M7_wcmpyrw_rnd #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rd32=cmpyrw(Rss32,Rtt32*):<<1:sat C Intrinsic Prototype: Word32 Q6_R_cmpyrw_PP_conj_s1_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_R_cmpyrw_PP_conj_s1_sat __builtin_HEXAGON_M7_wcmpyrwc #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ /* ========================================================================== Assembly Syntax: Rd32=cmpyrw(Rss32,Rtt32*):<<1:rnd:sat C Intrinsic Prototype: Word32 Q6_R_cmpyrw_PP_conj_s1_rnd_sat(Word64 Rss, Word64 Rtt) Instruction Type: M Execution Slots: SLOT3 ========================================================================== */ #define Q6_R_cmpyrw_PP_conj_s1_rnd_sat __builtin_HEXAGON_M7_wcmpyrwc_rnd #endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ #if __HEXAGON_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: dmlink(Rs32,Rt32) C Intrinsic Prototype: void Q6_dmlink_AA(Address Rs, Address Rt) Instruction Type: ST Execution Slots: SLOT0 ========================================================================== */ #define Q6_dmlink_AA __builtin_HEXAGON_Y6_dmlink #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HEXAGON_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Rd32=dmpause C Intrinsic Prototype: Word32 Q6_R_dmpause() Instruction Type: ST Execution Slots: SLOT0 ========================================================================== */ #define Q6_R_dmpause __builtin_HEXAGON_Y6_dmpause #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HEXAGON_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Rd32=dmpoll C Intrinsic Prototype: Word32 Q6_R_dmpoll() Instruction Type: ST Execution Slots: SLOT0 ========================================================================== */ #define Q6_R_dmpoll __builtin_HEXAGON_Y6_dmpoll #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HEXAGON_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: dmresume(Rs32) C Intrinsic Prototype: void Q6_dmresume_A(Address Rs) Instruction Type: ST Execution Slots: SLOT0 ========================================================================== */ #define Q6_dmresume_A __builtin_HEXAGON_Y6_dmresume #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HEXAGON_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: dmstart(Rs32) C Intrinsic Prototype: void Q6_dmstart_A(Address Rs) Instruction Type: ST Execution Slots: SLOT0 ========================================================================== */ #define Q6_dmstart_A __builtin_HEXAGON_Y6_dmstart #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HEXAGON_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Rd32=dmwait C Intrinsic Prototype: Word32 Q6_R_dmwait() Instruction Type: ST Execution Slots: SLOT0 ========================================================================== */ #define Q6_R_dmwait __builtin_HEXAGON_Y6_dmwait #endif /* __HEXAGON_ARCH___ >= 68 */ #include #ifdef __HVX__ #include #endif /* __HVX__ */ #endif /******************************************************************************/ /* (c) 2020 Qualcomm Innovation Center, Inc. All rights reserved. */ /* */ /******************************************************************************/ #ifndef HEXAGON_TYPES_H #define HEXAGON_TYPES_H #include /* Hexagon names */ #define HEXAGON_Vect HEXAGON_Vect64 #define HEXAGON_V_GET_D HEXAGON_V64_GET_D #define HEXAGON_V_GET_UD HEXAGON_V64_GET_UD #define HEXAGON_V_GET_W0 HEXAGON_V64_GET_W0 #define HEXAGON_V_GET_W1 HEXAGON_V64_GET_W1 #define HEXAGON_V_GET_UW0 HEXAGON_V64_GET_UW0 #define HEXAGON_V_GET_UW1 HEXAGON_V64_GET_UW1 #define HEXAGON_V_GET_H0 HEXAGON_V64_GET_H0 #define HEXAGON_V_GET_H1 HEXAGON_V64_GET_H1 #define HEXAGON_V_GET_H2 HEXAGON_V64_GET_H2 #define HEXAGON_V_GET_H3 HEXAGON_V64_GET_H3 #define HEXAGON_V_GET_UH0 HEXAGON_V64_GET_UH0 #define HEXAGON_V_GET_UH1 HEXAGON_V64_GET_UH1 #define HEXAGON_V_GET_UH2 HEXAGON_V64_GET_UH2 #define HEXAGON_V_GET_UH3 HEXAGON_V64_GET_UH3 #define HEXAGON_V_GET_B0 HEXAGON_V64_GET_B0 #define HEXAGON_V_GET_B1 HEXAGON_V64_GET_B1 #define HEXAGON_V_GET_B2 HEXAGON_V64_GET_B2 #define HEXAGON_V_GET_B3 HEXAGON_V64_GET_B3 #define HEXAGON_V_GET_B4 HEXAGON_V64_GET_B4 #define HEXAGON_V_GET_B5 HEXAGON_V64_GET_B5 #define HEXAGON_V_GET_B6 HEXAGON_V64_GET_B6 #define HEXAGON_V_GET_B7 HEXAGON_V64_GET_B7 #define HEXAGON_V_GET_UB0 HEXAGON_V64_GET_UB0 #define HEXAGON_V_GET_UB1 HEXAGON_V64_GET_UB1 #define HEXAGON_V_GET_UB2 HEXAGON_V64_GET_UB2 #define HEXAGON_V_GET_UB3 HEXAGON_V64_GET_UB3 #define HEXAGON_V_GET_UB4 HEXAGON_V64_GET_UB4 #define HEXAGON_V_GET_UB5 HEXAGON_V64_GET_UB5 #define HEXAGON_V_GET_UB6 HEXAGON_V64_GET_UB6 #define HEXAGON_V_GET_UB7 HEXAGON_V64_GET_UB7 #define HEXAGON_V_PUT_D HEXAGON_V64_PUT_D #define HEXAGON_V_PUT_W0 HEXAGON_V64_PUT_W0 #define HEXAGON_V_PUT_W1 HEXAGON_V64_PUT_W1 #define HEXAGON_V_PUT_H0 HEXAGON_V64_PUT_H0 #define HEXAGON_V_PUT_H1 HEXAGON_V64_PUT_H1 #define HEXAGON_V_PUT_H2 HEXAGON_V64_PUT_H2 #define HEXAGON_V_PUT_H3 HEXAGON_V64_PUT_H3 #define HEXAGON_V_PUT_B0 HEXAGON_V64_PUT_B0 #define HEXAGON_V_PUT_B1 HEXAGON_V64_PUT_B1 #define HEXAGON_V_PUT_B2 HEXAGON_V64_PUT_B2 #define HEXAGON_V_PUT_B3 HEXAGON_V64_PUT_B3 #define HEXAGON_V_PUT_B4 HEXAGON_V64_PUT_B4 #define HEXAGON_V_PUT_B5 HEXAGON_V64_PUT_B5 #define HEXAGON_V_PUT_B6 HEXAGON_V64_PUT_B6 #define HEXAGON_V_PUT_B7 HEXAGON_V64_PUT_B7 #define HEXAGON_V_CREATE_D HEXAGON_V64_CREATE_D #define HEXAGON_V_CREATE_W HEXAGON_V64_CREATE_W #define HEXAGON_V_CREATE_H HEXAGON_V64_CREATE_H #define HEXAGON_V_CREATE_B HEXAGON_V64_CREATE_B #ifdef __cplusplus #define HEXAGON_VectC HEXAGON_Vect64C #endif /* __cplusplus */ /* 64 Bit Vectors */ typedef long long __attribute__((__may_alias__)) HEXAGON_Vect64; /* Extract doubleword macros */ #define HEXAGON_V64_GET_D(v) (v) #define HEXAGON_V64_GET_UD(v) ((unsigned long long)(v)) /* Extract word macros */ #define HEXAGON_V64_GET_W0(v) \ __extension__({ \ union { \ long long d; \ int w[2]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.w[0]; \ }) #define HEXAGON_V64_GET_W1(v) \ __extension__({ \ union { \ long long d; \ int w[2]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.w[1]; \ }) #define HEXAGON_V64_GET_UW0(v) \ __extension__({ \ union { \ long long d; \ unsigned int uw[2]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.uw[0]; \ }) #define HEXAGON_V64_GET_UW1(v) \ __extension__({ \ union { \ long long d; \ unsigned int uw[2]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.uw[1]; \ }) /* Extract half word macros */ #define HEXAGON_V64_GET_H0(v) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.h[0]; \ }) #define HEXAGON_V64_GET_H1(v) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.h[1]; \ }) #define HEXAGON_V64_GET_H2(v) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.h[2]; \ }) #define HEXAGON_V64_GET_H3(v) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.h[3]; \ }) #define HEXAGON_V64_GET_UH0(v) \ __extension__({ \ union { \ long long d; \ unsigned short uh[4]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.uh[0]; \ }) #define HEXAGON_V64_GET_UH1(v) \ __extension__({ \ union { \ long long d; \ unsigned short uh[4]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.uh[1]; \ }) #define HEXAGON_V64_GET_UH2(v) \ __extension__({ \ union { \ long long d; \ unsigned short uh[4]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.uh[2]; \ }) #define HEXAGON_V64_GET_UH3(v) \ __extension__({ \ union { \ long long d; \ unsigned short uh[4]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.uh[3]; \ }) /* Extract byte macros */ #define HEXAGON_V64_GET_B0(v) \ __extension__({ \ union { \ long long d; \ signed char b[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.b[0]; \ }) #define HEXAGON_V64_GET_B1(v) \ __extension__({ \ union { \ long long d; \ signed char b[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.b[1]; \ }) #define HEXAGON_V64_GET_B2(v) \ __extension__({ \ union { \ long long d; \ signed char b[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.b[2]; \ }) #define HEXAGON_V64_GET_B3(v) \ __extension__({ \ union { \ long long d; \ signed char b[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.b[3]; \ }) #define HEXAGON_V64_GET_B4(v) \ __extension__({ \ union { \ long long d; \ signed char b[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.b[4]; \ }) #define HEXAGON_V64_GET_B5(v) \ __extension__({ \ union { \ long long d; \ signed char b[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.b[5]; \ }) #define HEXAGON_V64_GET_B6(v) \ __extension__({ \ union { \ long long d; \ signed char b[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.b[6]; \ }) #define HEXAGON_V64_GET_B7(v) \ __extension__({ \ union { \ long long d; \ signed char b[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.b[7]; \ }) #define HEXAGON_V64_GET_UB0(v) \ __extension__({ \ union { \ long long d; \ unsigned char ub[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.ub[0]; \ }) #define HEXAGON_V64_GET_UB1(v) \ __extension__({ \ union { \ long long d; \ unsigned char ub[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.ub[1]; \ }) #define HEXAGON_V64_GET_UB2(v) \ __extension__({ \ union { \ long long d; \ unsigned char ub[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.ub[2]; \ }) #define HEXAGON_V64_GET_UB3(v) \ __extension__({ \ union { \ long long d; \ unsigned char ub[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.ub[3]; \ }) #define HEXAGON_V64_GET_UB4(v) \ __extension__({ \ union { \ long long d; \ unsigned char ub[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.ub[4]; \ }) #define HEXAGON_V64_GET_UB5(v) \ __extension__({ \ union { \ long long d; \ unsigned char ub[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.ub[5]; \ }) #define HEXAGON_V64_GET_UB6(v) \ __extension__({ \ union { \ long long d; \ unsigned char ub[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.ub[6]; \ }) #define HEXAGON_V64_GET_UB7(v) \ __extension__({ \ union { \ long long d; \ unsigned char ub[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.ub[7]; \ }) /* NOTE: All set macros return a HEXAGON_Vect64 type */ /* Set doubleword macro */ #define HEXAGON_V64_PUT_D(v, new) (new) /* Set word macros */ #ifdef __hexagon__ #define HEXAGON_V64_PUT_W0(v, new) \ __extension__({ \ union { \ long long d; \ int w[2]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.w[0] = (new); \ _HEXAGON_V64_internal_union.d; \ }) #define HEXAGON_V64_PUT_W1(v, new) \ __extension__({ \ union { \ long long d; \ int w[2]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.w[1] = (new); \ _HEXAGON_V64_internal_union.d; \ }) #else /* !__hexagon__ */ #define HEXAGON_V64_PUT_W0(v, new) \ (((v) & 0xffffffff00000000LL) | ((HEXAGON_Vect64)((unsigned int)(new)))) #define HEXAGON_V64_PUT_W1(v, new) \ (((v) & 0x00000000ffffffffLL) | (((HEXAGON_Vect64)(new)) << 32LL)) #endif /* !__hexagon__ */ /* Set half word macros */ #ifdef __hexagon__ #define HEXAGON_V64_PUT_H0(v, new) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.h[0] = (new); \ _HEXAGON_V64_internal_union.d; \ }) #define HEXAGON_V64_PUT_H1(v, new) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.h[1] = (new); \ _HEXAGON_V64_internal_union.d; \ }) #define HEXAGON_V64_PUT_H2(v, new) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.h[2] = (new); \ _HEXAGON_V64_internal_union.d; \ }) #define HEXAGON_V64_PUT_H3(v, new) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.h[3] = (new); \ _HEXAGON_V64_internal_union.d; \ }) #else /* !__hexagon__ */ #define HEXAGON_V64_PUT_H0(v, new) \ (((v) & 0xffffffffffff0000LL) | ((HEXAGON_Vect64)((unsigned short)(new)))) #define HEXAGON_V64_PUT_H1(v, new) \ (((v) & 0xffffffff0000ffffLL) | (((HEXAGON_Vect64)((unsigned short)(new))) << 16LL)) #define HEXAGON_V64_PUT_H2(v, new) \ (((v) & 0xffff0000ffffffffLL) | (((HEXAGON_Vect64)((unsigned short)(new))) << 32LL)) #define HEXAGON_V64_PUT_H3(v, new) \ (((v) & 0x0000ffffffffffffLL) | (((HEXAGON_Vect64)(new)) << 48LL)) #endif /* !__hexagon__ */ /* Set byte macros */ #ifdef __hexagon__ #define HEXAGON_V64_PUT_B0(v, new) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.b[0] = (new); \ _HEXAGON_V64_internal_union.d; \ }) #define HEXAGON_V64_PUT_B1(v, new) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.b[1] = (new); \ _HEXAGON_V64_internal_union.d; \ }) #define HEXAGON_V64_PUT_B2(v, new) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.b[2] = (new); \ _HEXAGON_V64_internal_union.d; \ }) #define HEXAGON_V64_PUT_B3(v, new) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.b[3] = (new); \ _HEXAGON_V64_internal_union.d; \ }) #define HEXAGON_V64_PUT_B4(v, new) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.b[4] = (new); \ _HEXAGON_V64_internal_union.d; \ }) #define HEXAGON_V64_PUT_B5(v, new) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.b[5] = (new); \ _HEXAGON_V64_internal_union.d; \ }) #define HEXAGON_V64_PUT_B6(v, new) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.b[6] = (new); \ _HEXAGON_V64_internal_union.d; \ }) #define HEXAGON_V64_PUT_B7(v, new) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.d = (v); \ _HEXAGON_V64_internal_union.b[7] = (new); \ _HEXAGON_V64_internal_union.d; \ }) #else /* !__hexagon__ */ #define HEXAGON_V64_PUT_B0(v, new) \ (((v) & 0xffffffffffffff00LL) | ((HEXAGON_Vect64)((unsigned char)(new)))) #define HEXAGON_V64_PUT_B1(v, new) \ (((v) & 0xffffffffffff00ffLL) | (((HEXAGON_Vect64)((unsigned char)(new))) << 8LL)) #define HEXAGON_V64_PUT_B2(v, new) \ (((v) & 0xffffffffff00ffffLL) | (((HEXAGON_Vect64)((unsigned char)(new))) << 16LL)) #define HEXAGON_V64_PUT_B3(v, new) \ (((v) & 0xffffffff00ffffffLL) | (((HEXAGON_Vect64)((unsigned char)(new))) << 24LL)) #define HEXAGON_V64_PUT_B4(v, new) \ (((v) & 0xffffff00ffffffffLL) | (((HEXAGON_Vect64)((unsigned char)(new))) << 32LL)) #define HEXAGON_V64_PUT_B5(v, new) \ (((v) & 0xffff00ffffffffffLL) | (((HEXAGON_Vect64)((unsigned char)(new))) << 40LL)) #define HEXAGON_V64_PUT_B6(v, new) \ (((v) & 0xff00ffffffffffffLL) | (((HEXAGON_Vect64)((unsigned char)(new))) << 48LL)) #define HEXAGON_V64_PUT_B7(v, new) \ (((v) & 0x00ffffffffffffffLL) | (((HEXAGON_Vect64)(new)) << 56LL)) #endif /* !__hexagon__ */ /* NOTE: All create macros return a HEXAGON_Vect64 type */ /* Create from a doubleword */ #define HEXAGON_V64_CREATE_D(d) (d) /* Create from words */ #ifdef __hexagon__ #define HEXAGON_V64_CREATE_W(w1, w0) \ __extension__({ \ union { \ long long d; \ int w[2]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.w[0] = (w0); \ _HEXAGON_V64_internal_union.w[1] = (w1); \ _HEXAGON_V64_internal_union.d; \ }) #else /* !__hexagon__ */ #define HEXAGON_V64_CREATE_W(w1, w0) \ ((((HEXAGON_Vect64)(w1)) << 32LL) | ((HEXAGON_Vect64)((w0) & 0xffffffff))) #endif /* !__hexagon__ */ /* Create from half words */ #ifdef __hexagon__ #define HEXAGON_V64_CREATE_H(h3, h2, h1, h0) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.h[0] = (h0); \ _HEXAGON_V64_internal_union.h[1] = (h1); \ _HEXAGON_V64_internal_union.h[2] = (h2); \ _HEXAGON_V64_internal_union.h[3] = (h3); \ _HEXAGON_V64_internal_union.d; \ }) #else /* !__hexagon__ */ #define HEXAGON_V64_CREATE_H(h3, h2, h1, h0) \ ((((HEXAGON_Vect64)(h3)) << 48LL) | (((HEXAGON_Vect64)((h2) & 0xffff)) << 32LL) | \ (((HEXAGON_Vect64)((h1) & 0xffff)) << 16LL) | ((HEXAGON_Vect64)((h0) & 0xffff))) #endif /* !__hexagon__ */ /* Create from bytes */ #ifdef __hexagon__ #define HEXAGON_V64_CREATE_B(b7, b6, b5, b4, b3, b2, b1, b0) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _HEXAGON_V64_internal_union; \ _HEXAGON_V64_internal_union.b[0] = (b0); \ _HEXAGON_V64_internal_union.b[1] = (b1); \ _HEXAGON_V64_internal_union.b[2] = (b2); \ _HEXAGON_V64_internal_union.b[3] = (b3); \ _HEXAGON_V64_internal_union.b[4] = (b4); \ _HEXAGON_V64_internal_union.b[5] = (b5); \ _HEXAGON_V64_internal_union.b[6] = (b6); \ _HEXAGON_V64_internal_union.b[7] = (b7); \ _HEXAGON_V64_internal_union.d; \ }) #else /* !__hexagon__ */ #define HEXAGON_V64_CREATE_B(b7, b6, b5, b4, b3, b2, b1, b0) \ ((((HEXAGON_Vect64)(b7)) << 56LL) | (((HEXAGON_Vect64)((b6) & 0xff)) << 48LL) | \ (((HEXAGON_Vect64)((b5) & 0xff)) << 40LL) | (((HEXAGON_Vect64)((b4) & 0xff)) << 32LL) | \ (((HEXAGON_Vect64)((b3) & 0xff)) << 24LL) | (((HEXAGON_Vect64)((b2) & 0xff)) << 16LL) | \ (((HEXAGON_Vect64)((b1) & 0xff)) << 8LL) | ((HEXAGON_Vect64)((b0) & 0xff))) #endif /* !__hexagon__ */ #ifdef __cplusplus class HEXAGON_Vect64C { public: // Constructors HEXAGON_Vect64C(long long d = 0) : data(d) {}; HEXAGON_Vect64C(int w1, int w0) : data(HEXAGON_V64_CREATE_W(w1, w0)) {}; HEXAGON_Vect64C(short h3, short h2, short h1, short h0) : data(HEXAGON_V64_CREATE_H(h3, h2, h1, h0)) {}; HEXAGON_Vect64C(signed char b7, signed char b6, signed char b5, signed char b4, signed char b3, signed char b2, signed char b1, signed char b0) : data(HEXAGON_V64_CREATE_B(b7, b6, b5, b4, b3, b2, b1, b0)) {}; HEXAGON_Vect64C(const HEXAGON_Vect64C &v) : data(v.data) {}; HEXAGON_Vect64C &operator=(const HEXAGON_Vect64C &v) { data = v.data; return *this; }; operator long long() { return data; }; // Extract doubleword methods long long D(void) { return HEXAGON_V64_GET_D(data); }; unsigned long long UD(void) { return HEXAGON_V64_GET_UD(data); }; // Extract word methods int W0(void) { return HEXAGON_V64_GET_W0(data); }; int W1(void) { return HEXAGON_V64_GET_W1(data); }; unsigned int UW0(void) { return HEXAGON_V64_GET_UW0(data); }; unsigned int UW1(void) { return HEXAGON_V64_GET_UW1(data); }; // Extract half word methods short H0(void) { return HEXAGON_V64_GET_H0(data); }; short H1(void) { return HEXAGON_V64_GET_H1(data); }; short H2(void) { return HEXAGON_V64_GET_H2(data); }; short H3(void) { return HEXAGON_V64_GET_H3(data); }; unsigned short UH0(void) { return HEXAGON_V64_GET_UH0(data); }; unsigned short UH1(void) { return HEXAGON_V64_GET_UH1(data); }; unsigned short UH2(void) { return HEXAGON_V64_GET_UH2(data); }; unsigned short UH3(void) { return HEXAGON_V64_GET_UH3(data); }; // Extract byte methods signed char B0(void) { return HEXAGON_V64_GET_B0(data); }; signed char B1(void) { return HEXAGON_V64_GET_B1(data); }; signed char B2(void) { return HEXAGON_V64_GET_B2(data); }; signed char B3(void) { return HEXAGON_V64_GET_B3(data); }; signed char B4(void) { return HEXAGON_V64_GET_B4(data); }; signed char B5(void) { return HEXAGON_V64_GET_B5(data); }; signed char B6(void) { return HEXAGON_V64_GET_B6(data); }; signed char B7(void) { return HEXAGON_V64_GET_B7(data); }; unsigned char UB0(void) { return HEXAGON_V64_GET_UB0(data); }; unsigned char UB1(void) { return HEXAGON_V64_GET_UB1(data); }; unsigned char UB2(void) { return HEXAGON_V64_GET_UB2(data); }; unsigned char UB3(void) { return HEXAGON_V64_GET_UB3(data); }; unsigned char UB4(void) { return HEXAGON_V64_GET_UB4(data); }; unsigned char UB5(void) { return HEXAGON_V64_GET_UB5(data); }; unsigned char UB6(void) { return HEXAGON_V64_GET_UB6(data); }; unsigned char UB7(void) { return HEXAGON_V64_GET_UB7(data); }; // NOTE: All set methods return a HEXAGON_Vect64C type // Set doubleword method HEXAGON_Vect64C D(long long d) { return HEXAGON_Vect64C(HEXAGON_V64_PUT_D(data, d)); }; // Set word methods HEXAGON_Vect64C W0(int w) { return HEXAGON_Vect64C(HEXAGON_V64_PUT_W0(data, w)); }; HEXAGON_Vect64C W1(int w) { return HEXAGON_Vect64C(HEXAGON_V64_PUT_W1(data, w)); }; // Set half word methods HEXAGON_Vect64C H0(short h) { return HEXAGON_Vect64C(HEXAGON_V64_PUT_H0(data, h)); }; HEXAGON_Vect64C H1(short h) { return HEXAGON_Vect64C(HEXAGON_V64_PUT_H1(data, h)); }; HEXAGON_Vect64C H2(short h) { return HEXAGON_Vect64C(HEXAGON_V64_PUT_H2(data, h)); }; HEXAGON_Vect64C H3(short h) { return HEXAGON_Vect64C(HEXAGON_V64_PUT_H3(data, h)); }; // Set byte methods HEXAGON_Vect64C B0(signed char b) { return HEXAGON_Vect64C(HEXAGON_V64_PUT_B0(data, b)); }; HEXAGON_Vect64C B1(signed char b) { return HEXAGON_Vect64C(HEXAGON_V64_PUT_B1(data, b)); }; HEXAGON_Vect64C B2(signed char b) { return HEXAGON_Vect64C(HEXAGON_V64_PUT_B2(data, b)); }; HEXAGON_Vect64C B3(signed char b) { return HEXAGON_Vect64C(HEXAGON_V64_PUT_B3(data, b)); }; HEXAGON_Vect64C B4(signed char b) { return HEXAGON_Vect64C(HEXAGON_V64_PUT_B4(data, b)); }; HEXAGON_Vect64C B5(signed char b) { return HEXAGON_Vect64C(HEXAGON_V64_PUT_B5(data, b)); }; HEXAGON_Vect64C B6(signed char b) { return HEXAGON_Vect64C(HEXAGON_V64_PUT_B6(data, b)); }; HEXAGON_Vect64C B7(signed char b) { return HEXAGON_Vect64C(HEXAGON_V64_PUT_B7(data, b)); }; private: long long data; }; #endif /* __cplusplus */ /* 32 Bit Vectors */ typedef int HEXAGON_Vect32; /* Extract word macros */ #define HEXAGON_V32_GET_W(v) (v) #define HEXAGON_V32_GET_UW(v) ((unsigned int)(v)) /* Extract half word macros */ #define HEXAGON_V32_GET_H0(v) \ __extension__({ \ union { \ int w; \ short h[2]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.h[0]; \ }) #define HEXAGON_V32_GET_H1(v) \ __extension__({ \ union { \ int w; \ short h[2]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.h[1]; \ }) #define HEXAGON_V32_GET_UH0(v) \ __extension__({ \ union { \ int w; \ unsigned short uh[2]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.uh[0]; \ }) #define HEXAGON_V32_GET_UH1(v) \ __extension__({ \ union { \ int w; \ unsigned short uh[2]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.uh[1]; \ }) /* Extract byte macros */ #define HEXAGON_V32_GET_B0(v) \ __extension__({ \ union { \ int w; \ signed char b[4]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.b[0]; \ }) #define HEXAGON_V32_GET_B1(v) \ __extension__({ \ union { \ int w; \ signed char b[4]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.b[1]; \ }) #define HEXAGON_V32_GET_B2(v) \ __extension__({ \ union { \ int w; \ signed char b[4]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.b[2]; \ }) #define HEXAGON_V32_GET_B3(v) \ __extension__({ \ union { \ int w; \ signed char b[4]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.b[3]; \ }) #define HEXAGON_V32_GET_UB0(v) \ __extension__({ \ union { \ int w; \ unsigned char ub[4]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.ub[0]; \ }) #define HEXAGON_V32_GET_UB1(v) \ __extension__({ \ union { \ int w; \ unsigned char ub[4]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.ub[1]; \ }) #define HEXAGON_V32_GET_UB2(v) \ __extension__({ \ union { \ int w; \ unsigned char ub[4]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.ub[2]; \ }) #define HEXAGON_V32_GET_UB3(v) \ __extension__({ \ union { \ int w; \ unsigned char ub[4]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.ub[3]; \ }) /* NOTE: All set macros return a HEXAGON_Vect32 type */ /* Set word macro */ #define HEXAGON_V32_PUT_W(v, new) (new) /* Set half word macros */ #ifdef __hexagon__ #define HEXAGON_V32_PUT_H0(v, new) \ __extension__({ \ union { \ int w; \ short h[2]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.h[0] = (new); \ _HEXAGON_V32_internal_union.w; \ }) #define HEXAGON_V32_PUT_H1(v, new) \ __extension__({ \ union { \ int w; \ short h[2]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.h[1] = (new); \ _HEXAGON_V32_internal_union.w; \ }) #else /* !__hexagon__ */ #define HEXAGON_V32_PUT_H0(v, new) \ (((v) & 0xffff0000) | ((HEXAGON_Vect32)((unsigned short)(new)))) #define HEXAGON_V32_PUT_H1(v, new) (((v) & 0x0000ffff) | (((HEXAGON_Vect32)(new)) << 16)) #endif /* !__hexagon__ */ /* Set byte macros */ #ifdef __hexagon__ #define HEXAGON_V32_PUT_B0(v, new) \ __extension__({ \ union { \ int w; \ char b[4]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.b[0] = (new); \ _HEXAGON_V32_internal_union.w; \ }) #define HEXAGON_V32_PUT_B1(v, new) \ __extension__({ \ union { \ int w; \ char b[4]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.b[1] = (new); \ _HEXAGON_V32_internal_union.w; \ }) #define HEXAGON_V32_PUT_B2(v, new) \ __extension__({ \ union { \ int w; \ char b[4]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.b[2] = (new); \ _HEXAGON_V32_internal_union.w; \ }) #define HEXAGON_V32_PUT_B3(v, new) \ __extension__({ \ union { \ int w; \ char b[4]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.w = (v); \ _HEXAGON_V32_internal_union.b[3] = (new); \ _HEXAGON_V32_internal_union.w; \ }) #else /* !__hexagon__ */ #define HEXAGON_V32_PUT_B0(v, new) \ (((v) & 0xffffff00) | ((HEXAGON_Vect32)((unsigned char)(new)))) #define HEXAGON_V32_PUT_B1(v, new) \ (((v) & 0xffff00ff) | (((HEXAGON_Vect32)((unsigned char)(new))) << 8)) #define HEXAGON_V32_PUT_B2(v, new) \ (((v) & 0xff00ffff) | (((HEXAGON_Vect32)((unsigned char)(new))) << 16)) #define HEXAGON_V32_PUT_B3(v, new) (((v) & 0x00ffffff) | (((HEXAGON_Vect32)(new)) << 24)) #endif /* !__hexagon__ */ /* NOTE: All create macros return a HEXAGON_Vect32 type */ /* Create from a word */ #define HEXAGON_V32_CREATE_W(w) (w) /* Create from half words */ #ifdef __hexagon__ #define HEXAGON_V32_CREATE_H(h1, h0) \ __extension__({ \ union { \ long long d; \ short h[2]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.h[0] = (h0); \ _HEXAGON_V32_internal_union.h[1] = (h1); \ _HEXAGON_V32_internal_union.d; \ }) #else /* !__hexagon__ */ #define HEXAGON_V32_CREATE_H(h1, h0) \ ((((HEXAGON_Vect32)(h1)) << 16) | ((HEXAGON_Vect32)((h0) & 0xffff))) #endif /* !__hexagon__ */ /* Create from bytes */ #ifdef __hexagon__ #define HEXAGON_V32_CREATE_B(b3, b2, b1, b0) \ __extension__({ \ union { \ long long d; \ char b[4]; \ } _HEXAGON_V32_internal_union; \ _HEXAGON_V32_internal_union.b[0] = (b0); \ _HEXAGON_V32_internal_union.b[1] = (b1); \ _HEXAGON_V32_internal_union.b[2] = (b2); \ _HEXAGON_V32_internal_union.b[3] = (b3); \ _HEXAGON_V32_internal_union.d; \ }) #else /* !__hexagon__ */ #define HEXAGON_V32_CREATE_B(b3, b2, b1, b0) \ ((((HEXAGON_Vect32)(b3)) << 24) | (((HEXAGON_Vect32)((b2) & 0xff)) << 16) | \ (((HEXAGON_Vect32)((b1) & 0xff)) << 8) | ((HEXAGON_Vect32)((b0) & 0xff))) #endif /* !__hexagon__ */ #ifdef __cplusplus class HEXAGON_Vect32C { public: // Constructors HEXAGON_Vect32C(int w = 0) : data(w) {}; HEXAGON_Vect32C(short h1, short h0) : data(HEXAGON_V32_CREATE_H(h1, h0)) {}; HEXAGON_Vect32C(signed char b3, signed char b2, signed char b1, signed char b0) : data(HEXAGON_V32_CREATE_B(b3, b2, b1, b0)) {}; HEXAGON_Vect32C(const HEXAGON_Vect32C &v) : data(v.data) {}; HEXAGON_Vect32C &operator=(const HEXAGON_Vect32C &v) { data = v.data; return *this; }; operator int() { return data; }; // Extract word methods int W(void) { return HEXAGON_V32_GET_W(data); }; unsigned int UW(void) { return HEXAGON_V32_GET_UW(data); }; // Extract half word methods short H0(void) { return HEXAGON_V32_GET_H0(data); }; short H1(void) { return HEXAGON_V32_GET_H1(data); }; unsigned short UH0(void) { return HEXAGON_V32_GET_UH0(data); }; unsigned short UH1(void) { return HEXAGON_V32_GET_UH1(data); }; // Extract byte methods signed char B0(void) { return HEXAGON_V32_GET_B0(data); }; signed char B1(void) { return HEXAGON_V32_GET_B1(data); }; signed char B2(void) { return HEXAGON_V32_GET_B2(data); }; signed char B3(void) { return HEXAGON_V32_GET_B3(data); }; unsigned char UB0(void) { return HEXAGON_V32_GET_UB0(data); }; unsigned char UB1(void) { return HEXAGON_V32_GET_UB1(data); }; unsigned char UB2(void) { return HEXAGON_V32_GET_UB2(data); }; unsigned char UB3(void) { return HEXAGON_V32_GET_UB3(data); }; // NOTE: All set methods return a HEXAGON_Vect32C type // Set word method HEXAGON_Vect32C W(int w) { return HEXAGON_Vect32C(HEXAGON_V32_PUT_W(data, w)); }; // Set half word methods HEXAGON_Vect32C H0(short h) { return HEXAGON_Vect32C(HEXAGON_V32_PUT_H0(data, h)); }; HEXAGON_Vect32C H1(short h) { return HEXAGON_Vect32C(HEXAGON_V32_PUT_H1(data, h)); }; // Set byte methods HEXAGON_Vect32C B0(signed char b) { return HEXAGON_Vect32C(HEXAGON_V32_PUT_B0(data, b)); }; HEXAGON_Vect32C B1(signed char b) { return HEXAGON_Vect32C(HEXAGON_V32_PUT_B1(data, b)); }; HEXAGON_Vect32C B2(signed char b) { return HEXAGON_Vect32C(HEXAGON_V32_PUT_B2(data, b)); }; HEXAGON_Vect32C B3(signed char b) { return HEXAGON_Vect32C(HEXAGON_V32_PUT_B3(data, b)); }; private: int data; }; #endif /* __cplusplus */ // V65 Vector types #if __HVX_ARCH__ >= 65 #if defined __HVX__ && (__HVX_LENGTH__ == 128) typedef long HEXAGON_VecPred128 __attribute__((__vector_size__(128))) __attribute__((aligned(128))); typedef long HEXAGON_Vect1024 __attribute__((__vector_size__(128))) __attribute__((aligned(128))); typedef long HEXAGON_Vect2048 __attribute__((__vector_size__(256))) __attribute__((aligned(256))); typedef long HEXAGON_UVect1024 __attribute__((__vector_size__(128))) __attribute__((aligned(4))); typedef long HEXAGON_UVect2048 __attribute__((__vector_size__(256))) __attribute__((aligned(4))); #define HVX_VectorPred HEXAGON_VecPred128 #define HVX_Vector HEXAGON_Vect1024 #define HVX_VectorPair HEXAGON_Vect2048 #define HVX_UVector HEXAGON_UVect1024 #define HVX_UVectorPair HEXAGON_UVect2048 #else /* defined __HVX__ && (__HVX_LENGTH__ == 128) */ #if defined __HVX__ && (__HVX_LENGTH__ == 64) typedef long HEXAGON_VecPred64 __attribute__((__vector_size__(64))) __attribute__((aligned(64))); typedef long HEXAGON_Vect512 __attribute__((__vector_size__(64))) __attribute__((aligned(64))); typedef long HEXAGON_Vect1024 __attribute__((__vector_size__(128))) __attribute__((aligned(128))); typedef long HEXAGON_UVect512 __attribute__((__vector_size__(64))) __attribute__((aligned(4))); typedef long HEXAGON_UVect1024 __attribute__((__vector_size__(128))) __attribute__((aligned(4))); #define HVX_VectorPred HEXAGON_VecPred64 #define HVX_Vector HEXAGON_Vect512 #define HVX_VectorPair HEXAGON_Vect1024 #define HVX_UVector HEXAGON_UVect512 #define HVX_UVectorPair HEXAGON_UVect1024 #endif /* defined __HVX__ && (__HVX_LENGTH__ == 64) */ #endif /* defined __HVX__ && (__HVX_LENGTH__ == 128) */ #endif /* __HVX_ARCH__ >= 65 */ /* Predicates */ typedef int HEXAGON_Pred; /*** *** backward compatibility aliases ***/ /* Old names */ #define Q6Vect Q6Vect64 #define Q6V_GET_D Q6V64_GET_D #define Q6V_GET_UD Q6V64_GET_UD #define Q6V_GET_W0 Q6V64_GET_W0 #define Q6V_GET_W1 Q6V64_GET_W1 #define Q6V_GET_UW0 Q6V64_GET_UW0 #define Q6V_GET_UW1 Q6V64_GET_UW1 #define Q6V_GET_H0 Q6V64_GET_H0 #define Q6V_GET_H1 Q6V64_GET_H1 #define Q6V_GET_H2 Q6V64_GET_H2 #define Q6V_GET_H3 Q6V64_GET_H3 #define Q6V_GET_UH0 Q6V64_GET_UH0 #define Q6V_GET_UH1 Q6V64_GET_UH1 #define Q6V_GET_UH2 Q6V64_GET_UH2 #define Q6V_GET_UH3 Q6V64_GET_UH3 #define Q6V_GET_B0 Q6V64_GET_B0 #define Q6V_GET_B1 Q6V64_GET_B1 #define Q6V_GET_B2 Q6V64_GET_B2 #define Q6V_GET_B3 Q6V64_GET_B3 #define Q6V_GET_B4 Q6V64_GET_B4 #define Q6V_GET_B5 Q6V64_GET_B5 #define Q6V_GET_B6 Q6V64_GET_B6 #define Q6V_GET_B7 Q6V64_GET_B7 #define Q6V_GET_UB0 Q6V64_GET_UB0 #define Q6V_GET_UB1 Q6V64_GET_UB1 #define Q6V_GET_UB2 Q6V64_GET_UB2 #define Q6V_GET_UB3 Q6V64_GET_UB3 #define Q6V_GET_UB4 Q6V64_GET_UB4 #define Q6V_GET_UB5 Q6V64_GET_UB5 #define Q6V_GET_UB6 Q6V64_GET_UB6 #define Q6V_GET_UB7 Q6V64_GET_UB7 #define Q6V_PUT_D Q6V64_PUT_D #define Q6V_PUT_W0 Q6V64_PUT_W0 #define Q6V_PUT_W1 Q6V64_PUT_W1 #define Q6V_PUT_H0 Q6V64_PUT_H0 #define Q6V_PUT_H1 Q6V64_PUT_H1 #define Q6V_PUT_H2 Q6V64_PUT_H2 #define Q6V_PUT_H3 Q6V64_PUT_H3 #define Q6V_PUT_B0 Q6V64_PUT_B0 #define Q6V_PUT_B1 Q6V64_PUT_B1 #define Q6V_PUT_B2 Q6V64_PUT_B2 #define Q6V_PUT_B3 Q6V64_PUT_B3 #define Q6V_PUT_B4 Q6V64_PUT_B4 #define Q6V_PUT_B5 Q6V64_PUT_B5 #define Q6V_PUT_B6 Q6V64_PUT_B6 #define Q6V_PUT_B7 Q6V64_PUT_B7 #define Q6V_CREATE_D Q6V64_CREATE_D #define Q6V_CREATE_W Q6V64_CREATE_W #define Q6V_CREATE_H Q6V64_CREATE_H #define Q6V_CREATE_B Q6V64_CREATE_B #ifdef __cplusplus #define Q6VectC Q6Vect64C #endif /* __cplusplus */ /* 64 Bit Vectors */ typedef long long __attribute__((__may_alias__)) Q6Vect64; /* Extract doubleword macros */ #define Q6V64_GET_D(v) (v) #define Q6V64_GET_UD(v) ((unsigned long long)(v)) /* Extract word macros */ #define Q6V64_GET_W0(v) \ __extension__({ \ union { \ long long d; \ int w[2]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.w[0]; \ }) #define Q6V64_GET_W1(v) \ __extension__({ \ union { \ long long d; \ int w[2]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.w[1]; \ }) #define Q6V64_GET_UW0(v) \ __extension__({ \ union { \ long long d; \ unsigned int uw[2]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.uw[0]; \ }) #define Q6V64_GET_UW1(v) \ __extension__({ \ union { \ long long d; \ unsigned int uw[2]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.uw[1]; \ }) /* Extract half word macros */ #define Q6V64_GET_H0(v) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.h[0]; \ }) #define Q6V64_GET_H1(v) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.h[1]; \ }) #define Q6V64_GET_H2(v) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.h[2]; \ }) #define Q6V64_GET_H3(v) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.h[3]; \ }) #define Q6V64_GET_UH0(v) \ __extension__({ \ union { \ long long d; \ unsigned short uh[4]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.uh[0]; \ }) #define Q6V64_GET_UH1(v) \ __extension__({ \ union { \ long long d; \ unsigned short uh[4]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.uh[1]; \ }) #define Q6V64_GET_UH2(v) \ __extension__({ \ union { \ long long d; \ unsigned short uh[4]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.uh[2]; \ }) #define Q6V64_GET_UH3(v) \ __extension__({ \ union { \ long long d; \ unsigned short uh[4]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.uh[3]; \ }) /* Extract byte macros */ #define Q6V64_GET_B0(v) \ __extension__({ \ union { \ long long d; \ signed char b[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.b[0]; \ }) #define Q6V64_GET_B1(v) \ __extension__({ \ union { \ long long d; \ signed char b[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.b[1]; \ }) #define Q6V64_GET_B2(v) \ __extension__({ \ union { \ long long d; \ signed char b[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.b[2]; \ }) #define Q6V64_GET_B3(v) \ __extension__({ \ union { \ long long d; \ signed char b[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.b[3]; \ }) #define Q6V64_GET_B4(v) \ __extension__({ \ union { \ long long d; \ signed char b[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.b[4]; \ }) #define Q6V64_GET_B5(v) \ __extension__({ \ union { \ long long d; \ signed char b[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.b[5]; \ }) #define Q6V64_GET_B6(v) \ __extension__({ \ union { \ long long d; \ signed char b[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.b[6]; \ }) #define Q6V64_GET_B7(v) \ __extension__({ \ union { \ long long d; \ signed char b[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.b[7]; \ }) #define Q6V64_GET_UB0(v) \ __extension__({ \ union { \ long long d; \ unsigned char ub[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.ub[0]; \ }) #define Q6V64_GET_UB1(v) \ __extension__({ \ union { \ long long d; \ unsigned char ub[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.ub[1]; \ }) #define Q6V64_GET_UB2(v) \ __extension__({ \ union { \ long long d; \ unsigned char ub[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.ub[2]; \ }) #define Q6V64_GET_UB3(v) \ __extension__({ \ union { \ long long d; \ unsigned char ub[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.ub[3]; \ }) #define Q6V64_GET_UB4(v) \ __extension__({ \ union { \ long long d; \ unsigned char ub[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.ub[4]; \ }) #define Q6V64_GET_UB5(v) \ __extension__({ \ union { \ long long d; \ unsigned char ub[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.ub[5]; \ }) #define Q6V64_GET_UB6(v) \ __extension__({ \ union { \ long long d; \ unsigned char ub[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.ub[6]; \ }) #define Q6V64_GET_UB7(v) \ __extension__({ \ union { \ long long d; \ unsigned char ub[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.ub[7]; \ }) /* NOTE: All set macros return a Q6Vect64 type */ /* Set doubleword macro */ #define Q6V64_PUT_D(v, new) (new) /* Set word macros */ #ifdef __qdsp6__ #define Q6V64_PUT_W0(v, new) \ __extension__({ \ union { \ long long d; \ int w[2]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.w[0] = (new); \ _Q6V64_internal_union.d; \ }) #define Q6V64_PUT_W1(v, new) \ __extension__({ \ union { \ long long d; \ int w[2]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.w[1] = (new); \ _Q6V64_internal_union.d; \ }) #else /* !__qdsp6__ */ #define Q6V64_PUT_W0(v, new) \ (((v) & 0xffffffff00000000LL) | ((Q6Vect64)((unsigned int)(new)))) #define Q6V64_PUT_W1(v, new) \ (((v) & 0x00000000ffffffffLL) | (((Q6Vect64)(new)) << 32LL)) #endif /* !__qdsp6__ */ /* Set half word macros */ #ifdef __qdsp6__ #define Q6V64_PUT_H0(v, new) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.h[0] = (new); \ _Q6V64_internal_union.d; \ }) #define Q6V64_PUT_H1(v, new) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.h[1] = (new); \ _Q6V64_internal_union.d; \ }) #define Q6V64_PUT_H2(v, new) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.h[2] = (new); \ _Q6V64_internal_union.d; \ }) #define Q6V64_PUT_H3(v, new) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.h[3] = (new); \ _Q6V64_internal_union.d; \ }) #else /* !__qdsp6__ */ #define Q6V64_PUT_H0(v, new) \ (((v) & 0xffffffffffff0000LL) | ((Q6Vect64)((unsigned short)(new)))) #define Q6V64_PUT_H1(v, new) \ (((v) & 0xffffffff0000ffffLL) | (((Q6Vect64)((unsigned short)(new))) << 16LL)) #define Q6V64_PUT_H2(v, new) \ (((v) & 0xffff0000ffffffffLL) | (((Q6Vect64)((unsigned short)(new))) << 32LL)) #define Q6V64_PUT_H3(v, new) \ (((v) & 0x0000ffffffffffffLL) | (((Q6Vect64)(new)) << 48LL)) #endif /* !__qdsp6__ */ /* Set byte macros */ #ifdef __qdsp6__ #define Q6V64_PUT_B0(v, new) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.b[0] = (new); \ _Q6V64_internal_union.d; \ }) #define Q6V64_PUT_B1(v, new) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.b[1] = (new); \ _Q6V64_internal_union.d; \ }) #define Q6V64_PUT_B2(v, new) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.b[2] = (new); \ _Q6V64_internal_union.d; \ }) #define Q6V64_PUT_B3(v, new) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.b[3] = (new); \ _Q6V64_internal_union.d; \ }) #define Q6V64_PUT_B4(v, new) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.b[4] = (new); \ _Q6V64_internal_union.d; \ }) #define Q6V64_PUT_B5(v, new) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.b[5] = (new); \ _Q6V64_internal_union.d; \ }) #define Q6V64_PUT_B6(v, new) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.b[6] = (new); \ _Q6V64_internal_union.d; \ }) #define Q6V64_PUT_B7(v, new) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.d = (v); \ _Q6V64_internal_union.b[7] = (new); \ _Q6V64_internal_union.d; \ }) #else /* !__qdsp6__ */ #define Q6V64_PUT_B0(v, new) \ (((v) & 0xffffffffffffff00LL) | ((Q6Vect64)((unsigned char)(new)))) #define Q6V64_PUT_B1(v, new) \ (((v) & 0xffffffffffff00ffLL) | (((Q6Vect64)((unsigned char)(new))) << 8LL)) #define Q6V64_PUT_B2(v, new) \ (((v) & 0xffffffffff00ffffLL) | (((Q6Vect64)((unsigned char)(new))) << 16LL)) #define Q6V64_PUT_B3(v, new) \ (((v) & 0xffffffff00ffffffLL) | (((Q6Vect64)((unsigned char)(new))) << 24LL)) #define Q6V64_PUT_B4(v, new) \ (((v) & 0xffffff00ffffffffLL) | (((Q6Vect64)((unsigned char)(new))) << 32LL)) #define Q6V64_PUT_B5(v, new) \ (((v) & 0xffff00ffffffffffLL) | (((Q6Vect64)((unsigned char)(new))) << 40LL)) #define Q6V64_PUT_B6(v, new) \ (((v) & 0xff00ffffffffffffLL) | (((Q6Vect64)((unsigned char)(new))) << 48LL)) #define Q6V64_PUT_B7(v, new) \ (((v) & 0x00ffffffffffffffLL) | (((Q6Vect64)(new)) << 56LL)) #endif /* !__qdsp6__ */ /* NOTE: All create macros return a Q6Vect64 type */ /* Create from a doubleword */ #define Q6V64_CREATE_D(d) (d) /* Create from words */ #ifdef __qdsp6__ #define Q6V64_CREATE_W(w1, w0) \ __extension__({ \ union { \ long long d; \ int w[2]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.w[0] = (w0); \ _Q6V64_internal_union.w[1] = (w1); \ _Q6V64_internal_union.d; \ }) #else /* !__qdsp6__ */ #define Q6V64_CREATE_W(w1, w0) \ ((((Q6Vect64)(w1)) << 32LL) | ((Q6Vect64)((w0) & 0xffffffff))) #endif /* !__qdsp6__ */ /* Create from half words */ #ifdef __qdsp6__ #define Q6V64_CREATE_H(h3, h2, h1, h0) \ __extension__({ \ union { \ long long d; \ short h[4]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.h[0] = (h0); \ _Q6V64_internal_union.h[1] = (h1); \ _Q6V64_internal_union.h[2] = (h2); \ _Q6V64_internal_union.h[3] = (h3); \ _Q6V64_internal_union.d; \ }) #else /* !__qdsp6__ */ #define Q6V64_CREATE_H(h3, h2, h1, h0) \ ((((Q6Vect64)(h3)) << 48LL) | (((Q6Vect64)((h2) & 0xffff)) << 32LL) | \ (((Q6Vect64)((h1) & 0xffff)) << 16LL) | ((Q6Vect64)((h0) & 0xffff))) #endif /* !__qdsp6__ */ /* Create from bytes */ #ifdef __qdsp6__ #define Q6V64_CREATE_B(b7, b6, b5, b4, b3, b2, b1, b0) \ __extension__({ \ union { \ long long d; \ char b[8]; \ } _Q6V64_internal_union; \ _Q6V64_internal_union.b[0] = (b0); \ _Q6V64_internal_union.b[1] = (b1); \ _Q6V64_internal_union.b[2] = (b2); \ _Q6V64_internal_union.b[3] = (b3); \ _Q6V64_internal_union.b[4] = (b4); \ _Q6V64_internal_union.b[5] = (b5); \ _Q6V64_internal_union.b[6] = (b6); \ _Q6V64_internal_union.b[7] = (b7); \ _Q6V64_internal_union.d; \ }) #else /* !__qdsp6__ */ #define Q6V64_CREATE_B(b7, b6, b5, b4, b3, b2, b1, b0) \ ((((Q6Vect64)(b7)) << 56LL) | (((Q6Vect64)((b6) & 0xff)) << 48LL) | \ (((Q6Vect64)((b5) & 0xff)) << 40LL) | (((Q6Vect64)((b4) & 0xff)) << 32LL) | \ (((Q6Vect64)((b3) & 0xff)) << 24LL) | (((Q6Vect64)((b2) & 0xff)) << 16LL) | \ (((Q6Vect64)((b1) & 0xff)) << 8LL) | ((Q6Vect64)((b0) & 0xff))) #endif /* !__qdsp6__ */ #ifdef __cplusplus class Q6Vect64C { public: // Constructors Q6Vect64C(long long d = 0) : data(d) {}; Q6Vect64C(int w1, int w0) : data(Q6V64_CREATE_W(w1, w0)) {}; Q6Vect64C(short h3, short h2, short h1, short h0) : data(Q6V64_CREATE_H(h3, h2, h1, h0)) {}; Q6Vect64C(signed char b7, signed char b6, signed char b5, signed char b4, signed char b3, signed char b2, signed char b1, signed char b0) : data(Q6V64_CREATE_B(b7, b6, b5, b4, b3, b2, b1, b0)) {}; Q6Vect64C(const Q6Vect64C &v) : data(v.data) {}; Q6Vect64C &operator=(const Q6Vect64C &v) { data = v.data; return *this; }; operator long long() { return data; }; // Extract doubleword methods long long D(void) { return Q6V64_GET_D(data); }; unsigned long long UD(void) { return Q6V64_GET_UD(data); }; // Extract word methods int W0(void) { return Q6V64_GET_W0(data); }; int W1(void) { return Q6V64_GET_W1(data); }; unsigned int UW0(void) { return Q6V64_GET_UW0(data); }; unsigned int UW1(void) { return Q6V64_GET_UW1(data); }; // Extract half word methods short H0(void) { return Q6V64_GET_H0(data); }; short H1(void) { return Q6V64_GET_H1(data); }; short H2(void) { return Q6V64_GET_H2(data); }; short H3(void) { return Q6V64_GET_H3(data); }; unsigned short UH0(void) { return Q6V64_GET_UH0(data); }; unsigned short UH1(void) { return Q6V64_GET_UH1(data); }; unsigned short UH2(void) { return Q6V64_GET_UH2(data); }; unsigned short UH3(void) { return Q6V64_GET_UH3(data); }; // Extract byte methods signed char B0(void) { return Q6V64_GET_B0(data); }; signed char B1(void) { return Q6V64_GET_B1(data); }; signed char B2(void) { return Q6V64_GET_B2(data); }; signed char B3(void) { return Q6V64_GET_B3(data); }; signed char B4(void) { return Q6V64_GET_B4(data); }; signed char B5(void) { return Q6V64_GET_B5(data); }; signed char B6(void) { return Q6V64_GET_B6(data); }; signed char B7(void) { return Q6V64_GET_B7(data); }; unsigned char UB0(void) { return Q6V64_GET_UB0(data); }; unsigned char UB1(void) { return Q6V64_GET_UB1(data); }; unsigned char UB2(void) { return Q6V64_GET_UB2(data); }; unsigned char UB3(void) { return Q6V64_GET_UB3(data); }; unsigned char UB4(void) { return Q6V64_GET_UB4(data); }; unsigned char UB5(void) { return Q6V64_GET_UB5(data); }; unsigned char UB6(void) { return Q6V64_GET_UB6(data); }; unsigned char UB7(void) { return Q6V64_GET_UB7(data); }; // NOTE: All set methods return a Q6Vect64C type // Set doubleword method Q6Vect64C D(long long d) { return Q6Vect64C(Q6V64_PUT_D(data, d)); }; // Set word methods Q6Vect64C W0(int w) { return Q6Vect64C(Q6V64_PUT_W0(data, w)); }; Q6Vect64C W1(int w) { return Q6Vect64C(Q6V64_PUT_W1(data, w)); }; // Set half word methods Q6Vect64C H0(short h) { return Q6Vect64C(Q6V64_PUT_H0(data, h)); }; Q6Vect64C H1(short h) { return Q6Vect64C(Q6V64_PUT_H1(data, h)); }; Q6Vect64C H2(short h) { return Q6Vect64C(Q6V64_PUT_H2(data, h)); }; Q6Vect64C H3(short h) { return Q6Vect64C(Q6V64_PUT_H3(data, h)); }; // Set byte methods Q6Vect64C B0(signed char b) { return Q6Vect64C(Q6V64_PUT_B0(data, b)); }; Q6Vect64C B1(signed char b) { return Q6Vect64C(Q6V64_PUT_B1(data, b)); }; Q6Vect64C B2(signed char b) { return Q6Vect64C(Q6V64_PUT_B2(data, b)); }; Q6Vect64C B3(signed char b) { return Q6Vect64C(Q6V64_PUT_B3(data, b)); }; Q6Vect64C B4(signed char b) { return Q6Vect64C(Q6V64_PUT_B4(data, b)); }; Q6Vect64C B5(signed char b) { return Q6Vect64C(Q6V64_PUT_B5(data, b)); }; Q6Vect64C B6(signed char b) { return Q6Vect64C(Q6V64_PUT_B6(data, b)); }; Q6Vect64C B7(signed char b) { return Q6Vect64C(Q6V64_PUT_B7(data, b)); }; private: long long data; }; #endif /* __cplusplus */ /* 32 Bit Vectors */ typedef int Q6Vect32; /* Extract word macros */ #define Q6V32_GET_W(v) (v) #define Q6V32_GET_UW(v) ((unsigned int)(v)) /* Extract half word macros */ #define Q6V32_GET_H0(v) \ __extension__({ \ union { \ int w; \ short h[2]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.h[0]; \ }) #define Q6V32_GET_H1(v) \ __extension__({ \ union { \ int w; \ short h[2]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.h[1]; \ }) #define Q6V32_GET_UH0(v) \ __extension__({ \ union { \ int w; \ unsigned short uh[2]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.uh[0]; \ }) #define Q6V32_GET_UH1(v) \ __extension__({ \ union { \ int w; \ unsigned short uh[2]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.uh[1]; \ }) /* Extract byte macros */ #define Q6V32_GET_B0(v) \ __extension__({ \ union { \ int w; \ signed char b[4]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.b[0]; \ }) #define Q6V32_GET_B1(v) \ __extension__({ \ union { \ int w; \ signed char b[4]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.b[1]; \ }) #define Q6V32_GET_B2(v) \ __extension__({ \ union { \ int w; \ signed char b[4]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.b[2]; \ }) #define Q6V32_GET_B3(v) \ __extension__({ \ union { \ int w; \ signed char b[4]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.b[3]; \ }) #define Q6V32_GET_UB0(v) \ __extension__({ \ union { \ int w; \ unsigned char ub[4]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.ub[0]; \ }) #define Q6V32_GET_UB1(v) \ __extension__({ \ union { \ int w; \ unsigned char ub[4]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.ub[1]; \ }) #define Q6V32_GET_UB2(v) \ __extension__({ \ union { \ int w; \ unsigned char ub[4]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.ub[2]; \ }) #define Q6V32_GET_UB3(v) \ __extension__({ \ union { \ int w; \ unsigned char ub[4]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.ub[3]; \ }) /* NOTE: All set macros return a Q6Vect32 type */ /* Set word macro */ #define Q6V32_PUT_W(v, new) (new) /* Set half word macros */ #ifdef __qdsp6__ #define Q6V32_PUT_H0(v, new) \ __extension__({ \ union { \ int w; \ short h[2]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.h[0] = (new); \ _Q6V32_internal_union.w; \ }) #define Q6V32_PUT_H1(v, new) \ __extension__({ \ union { \ int w; \ short h[2]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.h[1] = (new); \ _Q6V32_internal_union.w; \ }) #else /* !__qdsp6__ */ #define Q6V32_PUT_H0(v, new) \ (((v) & 0xffff0000) | ((Q6Vect32)((unsigned short)(new)))) #define Q6V32_PUT_H1(v, new) (((v) & 0x0000ffff) | (((Q6Vect32)(new)) << 16)) #endif /* !__qdsp6__ */ /* Set byte macros */ #ifdef __qdsp6__ #define Q6V32_PUT_B0(v, new) \ __extension__({ \ union { \ int w; \ char b[4]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.b[0] = (new); \ _Q6V32_internal_union.w; \ }) #define Q6V32_PUT_B1(v, new) \ __extension__({ \ union { \ int w; \ char b[4]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.b[1] = (new); \ _Q6V32_internal_union.w; \ }) #define Q6V32_PUT_B2(v, new) \ __extension__({ \ union { \ int w; \ char b[4]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.b[2] = (new); \ _Q6V32_internal_union.w; \ }) #define Q6V32_PUT_B3(v, new) \ __extension__({ \ union { \ int w; \ char b[4]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.w = (v); \ _Q6V32_internal_union.b[3] = (new); \ _Q6V32_internal_union.w; \ }) #else /* !__qdsp6__ */ #define Q6V32_PUT_B0(v, new) \ (((v) & 0xffffff00) | ((Q6Vect32)((unsigned char)(new)))) #define Q6V32_PUT_B1(v, new) \ (((v) & 0xffff00ff) | (((Q6Vect32)((unsigned char)(new))) << 8)) #define Q6V32_PUT_B2(v, new) \ (((v) & 0xff00ffff) | (((Q6Vect32)((unsigned char)(new))) << 16)) #define Q6V32_PUT_B3(v, new) (((v) & 0x00ffffff) | (((Q6Vect32)(new)) << 24)) #endif /* !__qdsp6__ */ /* NOTE: All create macros return a Q6Vect32 type */ /* Create from a word */ #define Q6V32_CREATE_W(w) (w) /* Create from half words */ #ifdef __qdsp6__ #define Q6V32_CREATE_H(h1, h0) \ __extension__({ \ union { \ long long d; \ short h[2]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.h[0] = (h0); \ _Q6V32_internal_union.h[1] = (h1); \ _Q6V32_internal_union.d; \ }) #else /* !__qdsp6__ */ #define Q6V32_CREATE_H(h1, h0) \ ((((Q6Vect32)(h1)) << 16) | ((Q6Vect32)((h0) & 0xffff))) #endif /* !__qdsp6__ */ /* Create from bytes */ #ifdef __qdsp6__ #define Q6V32_CREATE_B(b3, b2, b1, b0) \ __extension__({ \ union { \ long long d; \ char b[4]; \ } _Q6V32_internal_union; \ _Q6V32_internal_union.b[0] = (b0); \ _Q6V32_internal_union.b[1] = (b1); \ _Q6V32_internal_union.b[2] = (b2); \ _Q6V32_internal_union.b[3] = (b3); \ _Q6V32_internal_union.d; \ }) #else /* !__qdsp6__ */ #define Q6V32_CREATE_B(b3, b2, b1, b0) \ ((((Q6Vect32)(b3)) << 24) | (((Q6Vect32)((b2) & 0xff)) << 16) | \ (((Q6Vect32)((b1) & 0xff)) << 8) | ((Q6Vect32)((b0) & 0xff))) #endif /* !__qdsp6__ */ #ifdef __cplusplus class Q6Vect32C { public: // Constructors Q6Vect32C(int w = 0) : data(w) {}; Q6Vect32C(short h1, short h0) : data(Q6V32_CREATE_H(h1, h0)) {}; Q6Vect32C(signed char b3, signed char b2, signed char b1, signed char b0) : data(Q6V32_CREATE_B(b3, b2, b1, b0)) {}; Q6Vect32C(const Q6Vect32C &v) : data(v.data) {}; Q6Vect32C &operator=(const Q6Vect32C &v) { data = v.data; return *this; }; operator int() { return data; }; // Extract word methods int W(void) { return Q6V32_GET_W(data); }; unsigned int UW(void) { return Q6V32_GET_UW(data); }; // Extract half word methods short H0(void) { return Q6V32_GET_H0(data); }; short H1(void) { return Q6V32_GET_H1(data); }; unsigned short UH0(void) { return Q6V32_GET_UH0(data); }; unsigned short UH1(void) { return Q6V32_GET_UH1(data); }; // Extract byte methods signed char B0(void) { return Q6V32_GET_B0(data); }; signed char B1(void) { return Q6V32_GET_B1(data); }; signed char B2(void) { return Q6V32_GET_B2(data); }; signed char B3(void) { return Q6V32_GET_B3(data); }; unsigned char UB0(void) { return Q6V32_GET_UB0(data); }; unsigned char UB1(void) { return Q6V32_GET_UB1(data); }; unsigned char UB2(void) { return Q6V32_GET_UB2(data); }; unsigned char UB3(void) { return Q6V32_GET_UB3(data); }; // NOTE: All set methods return a Q6Vect32C type // Set word method Q6Vect32C W(int w) { return Q6Vect32C(Q6V32_PUT_W(data, w)); }; // Set half word methods Q6Vect32C H0(short h) { return Q6Vect32C(Q6V32_PUT_H0(data, h)); }; Q6Vect32C H1(short h) { return Q6Vect32C(Q6V32_PUT_H1(data, h)); }; // Set byte methods Q6Vect32C B0(signed char b) { return Q6Vect32C(Q6V32_PUT_B0(data, b)); }; Q6Vect32C B1(signed char b) { return Q6Vect32C(Q6V32_PUT_B1(data, b)); }; Q6Vect32C B2(signed char b) { return Q6Vect32C(Q6V32_PUT_B2(data, b)); }; Q6Vect32C B3(signed char b) { return Q6Vect32C(Q6V32_PUT_B3(data, b)); }; private: int data; }; #endif /* __cplusplus */ // V65 Vector types #if __HVX_ARCH__ >= 65 #if defined __HVX__ && (__HVX_LENGTH__ == 128) typedef long Q6VecPred128 __attribute__((__vector_size__(128))) __attribute__((aligned(128))); typedef long Q6Vect1024 __attribute__((__vector_size__(128))) __attribute__((aligned(128))); typedef long Q6Vect2048 __attribute__((__vector_size__(256))) __attribute__((aligned(256))); #else /* defined __HVX__ && (__HVX_LENGTH__ == 128) */ #if defined __HVX__ && (__HVX_LENGTH__ == 64) typedef long Q6VecPred64 __attribute__((__vector_size__(64))) __attribute__((aligned(64))); typedef long Q6Vect512 __attribute__((__vector_size__(64))) __attribute__((aligned(64))); typedef long Q6Vect1024 __attribute__((__vector_size__(128))) __attribute__((aligned(128))); #endif /* defined __HVX__ && (__HVX_LENGTH__ == 64) */ #endif /* defined __HVX__ && (__HVX_LENGTH__ == 128) */ #endif /* __HVX_ARCH__ >= 65 */ /* Predicates */ typedef int Q6Pred; #ifdef __HVX__ // Extract HVX VectorPair macro. #define HEXAGON_HVX_GET_W(v) (v) // Extract HVX Vector macros. #define HEXAGON_HVX_GET_V0(v) \ __extension__({ \ union { \ HVX_VectorPair W; \ HVX_Vector V[2]; \ } _HEXAGON_HVX_internal_union; \ _HEXAGON_HVX_internal_union.W = (v); \ _HEXAGON_HVX_internal_union.V[0]; \ }) #define HEXAGON_HVX_GET_V1(v) \ __extension__({ \ union { \ HVX_VectorPair W; \ HVX_Vector V[2]; \ } _HEXAGON_HVX_internal_union; \ _HEXAGON_HVX_internal_union.W = (v); \ _HEXAGON_HVX_internal_union.V[1]; \ }) #define HEXAGON_HVX_GET_P(v) \ __extension__({ \ union { \ HVX_VectorPair W; \ HVX_VectorPred P[2]; \ } _HEXAGON_HVX_internal_union; \ _HEXAGON_HVX_internal_union.W = (v); \ _HEXAGON_HVX_internal_union.P[0]; \ }) // Set HVX VectorPair macro. #define HEXAGON_HVX_PUT_W(v, new) (new) // Set HVX Vector macros. #define HEXAGON_HVX_PUT_V0(v, new) \ __extension__({ \ union { \ HVX_VectorPair W; \ HVX_Vector V[2]; \ } _HEXAGON_HVX_internal_union; \ _HEXAGON_HVX_internal_union.W = (v); \ _HEXAGON_HVX_internal_union.V[0] = (new); \ _HEXAGON_HVX_internal_union.W; \ }) #define HEXAGON_HVX_PUT_V1(v, new) \ __extension__({ \ union { \ HVX_VectorPair W; \ HVX_Vector V[2]; \ } _HEXAGON_HVX_internal_union; \ _HEXAGON_HVX_internal_union.W = (v); \ _HEXAGON_HVX_internal_union.V[1] = (new); \ _HEXAGON_HVX_internal_union.W; \ }) #define HEXAGON_HVX_PUT_P(v, new) \ __extension__({ \ union { \ HVX_VectorPair W; \ HVX_VectorPred P[2]; \ } _HEXAGON_HVX_internal_union; \ _HEXAGON_HVX_internal_union.W = (v); \ _HEXAGON_HVX_internal_union.P[0] = (new); \ _HEXAGON_HVX_internal_union.W; \ }) #define HEXAGON_HVX_CREATE_W(v1, v0) \ __extension__({ \ union { \ HVX_VectorPair W; \ HVX_Vector V[2]; \ } _HEXAGON_HVX_internal_union; \ _HEXAGON_HVX_internal_union.V[0] = (v0); \ _HEXAGON_HVX_internal_union.V[1] = (v1); \ _HEXAGON_HVX_internal_union.W; \ }) #ifdef __cplusplus class HVX_Vect { public: // Constructors. // Default. HVX_Vect() : data(Q6_W_vcombine_VV(Q6_V_vzero(), Q6_V_vzero())){}; // Custom constructors. HVX_Vect(HVX_VectorPair W) : data(W){}; HVX_Vect(HVX_Vector v1, HVX_Vector v0) : data(HEXAGON_HVX_CREATE_W(v1, v0)){}; // Copy constructor. HVX_Vect(const HVX_Vect &W) = default; // Move constructor. HVX_Vect(HVX_Vect &&W) = default; // Assignment operator. HVX_Vect &operator=(const HVX_Vect &W) = default; operator HVX_VectorPair() { return data; }; // Extract VectorPair method. HVX_VectorPair W(void) { return HEXAGON_HVX_GET_W(data); }; // Extract Vector methods. HVX_Vector V0(void) { return HEXAGON_HVX_GET_V0(data); }; HVX_Vector V1(void) { return HEXAGON_HVX_GET_V1(data); }; HVX_VectorPred P(void) { return HEXAGON_HVX_GET_P(data); }; // NOTE: All set methods return a HVX_Vect type. // Set HVX VectorPair method. HVX_Vect W(HVX_VectorPair w) { return HVX_Vect(HEXAGON_HVX_PUT_W(data, w)); }; // Set HVX Vector methods. HVX_Vect V0(HVX_Vector v) { return HVX_Vect(HEXAGON_HVX_PUT_V0(data, v)); }; HVX_Vect V1(HVX_Vector v) { return HVX_Vect(HEXAGON_HVX_PUT_V1(data, v)); }; HVX_Vect P(HVX_VectorPred p) { return HVX_Vect(HEXAGON_HVX_PUT_P(data, p)); }; private: HVX_VectorPair data; }; #endif /* __cplusplus */ #endif /* __HVX__ */ #define HEXAGON_UDMA_DM0_STATUS_IDLE 0x00000000 #define HEXAGON_UDMA_DM0_STATUS_RUN 0x00000001 #define HEXAGON_UDMA_DM0_STATUS_ERROR 0x00000002 #define HEXAGON_UDMA_DESC_DSTATE_INCOMPLETE 0 #define HEXAGON_UDMA_DESC_DSTATE_COMPLETE 1 #define HEXAGON_UDMA_DESC_ORDER_NOORDER 0 #define HEXAGON_UDMA_DESC_ORDER_ORDER 1 #define HEXAGON_UDMA_DESC_BYPASS_OFF 0 #define HEXAGON_UDMA_DESC_BYPASS_ON 1 #define HEXAGON_UDMA_DESC_COMP_NONE 0 #define HEXAGON_UDMA_DESC_COMP_DLBC 1 #define HEXAGON_UDMA_DESC_DESCTYPE_TYPE0 0 #define HEXAGON_UDMA_DESC_DESCTYPE_TYPE1 1 typedef struct hexagon_udma_descriptor_type0_s { void *next; unsigned int length:24; unsigned int desctype:2; unsigned int dstcomp:1; unsigned int srccomp:1; unsigned int dstbypass:1; unsigned int srcbypass:1; unsigned int order:1; unsigned int dstate:1; void *src; void *dst; } hexagon_udma_descriptor_type0_t; typedef struct hexagon_udma_descriptor_type1_s { void *next; unsigned int length:24; unsigned int desctype:2; unsigned int dstcomp:1; unsigned int srccomp:1; unsigned int dstbypass:1; unsigned int srcbypass:1; unsigned int order:1; unsigned int dstate:1; void *src; void *dst; unsigned int allocation:28; unsigned int padding:4; unsigned int roiwidth:16; unsigned int roiheight:16; unsigned int srcstride:16; unsigned int dststride:16; unsigned int srcwidthoffset:16; unsigned int dstwidthoffset:16; } hexagon_udma_descriptor_type1_t; #endif /* !HEXAGON_TYPES_H */ /*===---------------- hresetintrin.h - HRESET intrinsics -------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __X86GPRINTRIN_H #error "Never use directly; include instead." #endif #ifndef __HRESETINTRIN_H #define __HRESETINTRIN_H #if __has_extension(gnu_asm) /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("hreset"))) /// Provides a hint to the processor to selectively reset the prediction /// history of the current logical processor specified by a 32-bit integer /// value \a __eax. /// /// This intrinsic corresponds to the HRESET instruction. /// /// \code{.operation} /// IF __eax == 0 /// // nop /// ELSE /// FOR i := 0 to 31 /// IF __eax[i] /// ResetPredictionFeature(i) /// FI /// ENDFOR /// FI /// \endcode static __inline void __DEFAULT_FN_ATTRS _hreset(int __eax) { __asm__ ("hreset $0" :: "a"(__eax)); } #undef __DEFAULT_FN_ATTRS #endif /* __has_extension(gnu_asm) */ #endif /* __HRESETINTRIN_H */ /*===---- htmintrin.h - Standard header for PowerPC HTM ---------------===*\ * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * \*===----------------------------------------------------------------------===*/ #ifndef __HTMINTRIN_H #define __HTMINTRIN_H #ifndef __HTM__ #error "HTM instruction set not enabled" #endif #ifdef __powerpc__ #include typedef uint64_t texasr_t; typedef uint32_t texasru_t; typedef uint32_t texasrl_t; typedef uintptr_t tfiar_t; typedef uintptr_t tfhar_t; #define _HTM_STATE(CR0) ((CR0 >> 1) & 0x3) #define _HTM_NONTRANSACTIONAL 0x0 #define _HTM_SUSPENDED 0x1 #define _HTM_TRANSACTIONAL 0x2 #define _TEXASR_EXTRACT_BITS(TEXASR,BITNUM,SIZE) \ (((TEXASR) >> (63-(BITNUM))) & ((1<<(SIZE))-1)) #define _TEXASRU_EXTRACT_BITS(TEXASR,BITNUM,SIZE) \ (((TEXASR) >> (31-(BITNUM))) & ((1<<(SIZE))-1)) #define _TEXASR_FAILURE_CODE(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 7, 8) #define _TEXASRU_FAILURE_CODE(TEXASRU) \ _TEXASRU_EXTRACT_BITS(TEXASRU, 7, 8) #define _TEXASR_FAILURE_PERSISTENT(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 7, 1) #define _TEXASRU_FAILURE_PERSISTENT(TEXASRU) \ _TEXASRU_EXTRACT_BITS(TEXASRU, 7, 1) #define _TEXASR_DISALLOWED(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 8, 1) #define _TEXASRU_DISALLOWED(TEXASRU) \ _TEXASRU_EXTRACT_BITS(TEXASRU, 8, 1) #define _TEXASR_NESTING_OVERFLOW(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 9, 1) #define _TEXASRU_NESTING_OVERFLOW(TEXASRU) \ _TEXASRU_EXTRACT_BITS(TEXASRU, 9, 1) #define _TEXASR_FOOTPRINT_OVERFLOW(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 10, 1) #define _TEXASRU_FOOTPRINT_OVERFLOW(TEXASRU) \ _TEXASRU_EXTRACT_BITS(TEXASRU, 10, 1) #define _TEXASR_SELF_INDUCED_CONFLICT(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 11, 1) #define _TEXASRU_SELF_INDUCED_CONFLICT(TEXASRU) \ _TEXASRU_EXTRACT_BITS(TEXASRU, 11, 1) #define _TEXASR_NON_TRANSACTIONAL_CONFLICT(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 12, 1) #define _TEXASRU_NON_TRANSACTIONAL_CONFLICT(TEXASRU) \ _TEXASRU_EXTRACT_BITS(TEXASRU, 12, 1) #define _TEXASR_TRANSACTION_CONFLICT(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 13, 1) #define _TEXASRU_TRANSACTION_CONFLICT(TEXASRU) \ _TEXASRU_EXTRACT_BITS(TEXASRU, 13, 1) #define _TEXASR_TRANSLATION_INVALIDATION_CONFLICT(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 14, 1) #define _TEXASRU_TRANSLATION_INVALIDATION_CONFLICT(TEXASRU) \ _TEXASRU_EXTRACT_BITS(TEXASRU, 14, 1) #define _TEXASR_IMPLEMENTAION_SPECIFIC(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 15, 1) #define _TEXASRU_IMPLEMENTAION_SPECIFIC(TEXASRU) \ _TEXASRU_EXTRACT_BITS(TEXASRU, 15, 1) #define _TEXASR_INSTRUCTION_FETCH_CONFLICT(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 16, 1) #define _TEXASRU_INSTRUCTION_FETCH_CONFLICT(TEXASRU) \ _TEXASRU_EXTRACT_BITS(TEXASRU, 16, 1) #define _TEXASR_ABORT(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 31, 1) #define _TEXASRU_ABORT(TEXASRU) \ _TEXASRU_EXTRACT_BITS(TEXASRU, 31, 1) #define _TEXASR_SUSPENDED(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 32, 1) #define _TEXASR_PRIVILEGE(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 35, 2) #define _TEXASR_FAILURE_SUMMARY(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 36, 1) #define _TEXASR_TFIAR_EXACT(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 37, 1) #define _TEXASR_ROT(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 38, 1) #define _TEXASR_TRANSACTION_LEVEL(TEXASR) \ _TEXASR_EXTRACT_BITS(TEXASR, 63, 12) #endif /* __powerpc */ #ifdef __s390__ /* Condition codes generated by tbegin */ #define _HTM_TBEGIN_STARTED 0 #define _HTM_TBEGIN_INDETERMINATE 1 #define _HTM_TBEGIN_TRANSIENT 2 #define _HTM_TBEGIN_PERSISTENT 3 /* The abort codes below this threshold are reserved for machine use. */ #define _HTM_FIRST_USER_ABORT_CODE 256 /* The transaction diagnostic block is it is defined in the Principles of Operation chapter 5-91. */ struct __htm_tdb { unsigned char format; /* 0 */ unsigned char flags; unsigned char reserved1[4]; unsigned short nesting_depth; unsigned long long abort_code; /* 8 */ unsigned long long conflict_token; /* 16 */ unsigned long long atia; /* 24 */ unsigned char eaid; /* 32 */ unsigned char dxc; unsigned char reserved2[2]; unsigned int program_int_id; unsigned long long exception_id; /* 40 */ unsigned long long bea; /* 48 */ unsigned char reserved3[72]; /* 56 */ unsigned long long gprs[16]; /* 128 */ } __attribute__((__packed__, __aligned__ (8))); /* Helper intrinsics to retry tbegin in case of transient failure. */ static __inline int __attribute__((__always_inline__, __nodebug__)) __builtin_tbegin_retry_null (int __retry) { int cc, i = 0; while ((cc = __builtin_tbegin(0)) == _HTM_TBEGIN_TRANSIENT && i++ < __retry) __builtin_tx_assist(i); return cc; } static __inline int __attribute__((__always_inline__, __nodebug__)) __builtin_tbegin_retry_tdb (void *__tdb, int __retry) { int cc, i = 0; while ((cc = __builtin_tbegin(__tdb)) == _HTM_TBEGIN_TRANSIENT && i++ < __retry) __builtin_tx_assist(i); return cc; } #define __builtin_tbegin_retry(tdb, retry) \ (__builtin_constant_p(tdb == 0) && tdb == 0 ? \ __builtin_tbegin_retry_null(retry) : \ __builtin_tbegin_retry_tdb(tdb, retry)) static __inline int __attribute__((__always_inline__, __nodebug__)) __builtin_tbegin_retry_nofloat_null (int __retry) { int cc, i = 0; while ((cc = __builtin_tbegin_nofloat(0)) == _HTM_TBEGIN_TRANSIENT && i++ < __retry) __builtin_tx_assist(i); return cc; } static __inline int __attribute__((__always_inline__, __nodebug__)) __builtin_tbegin_retry_nofloat_tdb (void *__tdb, int __retry) { int cc, i = 0; while ((cc = __builtin_tbegin_nofloat(__tdb)) == _HTM_TBEGIN_TRANSIENT && i++ < __retry) __builtin_tx_assist(i); return cc; } #define __builtin_tbegin_retry_nofloat(tdb, retry) \ (__builtin_constant_p(tdb == 0) && tdb == 0 ? \ __builtin_tbegin_retry_nofloat_null(retry) : \ __builtin_tbegin_retry_nofloat_tdb(tdb, retry)) #endif /* __s390__ */ #endif /* __HTMINTRIN_H */ /*===---- htmxlintrin.h - XL compiler HTM execution intrinsics-------------===*\ * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * \*===----------------------------------------------------------------------===*/ #ifndef __HTMXLINTRIN_H #define __HTMXLINTRIN_H #ifndef __HTM__ #error "HTM instruction set not enabled" #endif #include #ifdef __powerpc__ #ifdef __cplusplus extern "C" { #endif #define _TEXASR_PTR(TM_BUF) ((texasr_t *)((char *)(TM_BUF) + 0)) #define _TEXASRU_PTR(TM_BUF) ((texasru_t *)((char *)(TM_BUF) + 0)) #define _TEXASRL_PTR(TM_BUF) ((texasrl_t *)((char *)(TM_BUF) + 4)) #define _TFIAR_PTR(TM_BUF) ((tfiar_t *)((char *)(TM_BUF) + 8)) typedef char TM_buff_type[16]; /* This macro can be used to determine whether a transaction was successfully started from the __TM_begin() and __TM_simple_begin() intrinsic functions below. */ #define _HTM_TBEGIN_STARTED 1 extern __inline long __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_simple_begin (void) { if (__builtin_expect (__builtin_tbegin (0), 1)) return _HTM_TBEGIN_STARTED; return 0; } extern __inline long __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_begin (void* const __TM_buff) { *_TEXASRL_PTR (__TM_buff) = 0; if (__builtin_expect (__builtin_tbegin (0), 1)) return _HTM_TBEGIN_STARTED; #ifdef __powerpc64__ *_TEXASR_PTR (__TM_buff) = __builtin_get_texasr (); #else *_TEXASRU_PTR (__TM_buff) = __builtin_get_texasru (); *_TEXASRL_PTR (__TM_buff) = __builtin_get_texasr (); #endif *_TFIAR_PTR (__TM_buff) = __builtin_get_tfiar (); return 0; } extern __inline long __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_end (void) { if (__builtin_expect (__builtin_tend (0), 1)) return 1; return 0; } extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_abort (void) { __builtin_tabort (0); } extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_named_abort (unsigned char const __code) { __builtin_tabort (__code); } extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_resume (void) { __builtin_tresume (); } extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_suspend (void) { __builtin_tsuspend (); } extern __inline long __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_is_user_abort (void* const __TM_buff) { texasru_t texasru = *_TEXASRU_PTR (__TM_buff); return _TEXASRU_ABORT (texasru); } extern __inline long __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_is_named_user_abort (void* const __TM_buff, unsigned char *__code) { texasru_t texasru = *_TEXASRU_PTR (__TM_buff); *__code = _TEXASRU_FAILURE_CODE (texasru); return _TEXASRU_ABORT (texasru); } extern __inline long __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_is_illegal (void* const __TM_buff) { texasru_t texasru = *_TEXASRU_PTR (__TM_buff); return _TEXASRU_DISALLOWED (texasru); } extern __inline long __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_is_footprint_exceeded (void* const __TM_buff) { texasru_t texasru = *_TEXASRU_PTR (__TM_buff); return _TEXASRU_FOOTPRINT_OVERFLOW (texasru); } extern __inline long __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_nesting_depth (void* const __TM_buff) { texasrl_t texasrl; if (_HTM_STATE (__builtin_ttest ()) == _HTM_NONTRANSACTIONAL) { texasrl = *_TEXASRL_PTR (__TM_buff); if (!_TEXASR_FAILURE_SUMMARY (texasrl)) texasrl = 0; } else texasrl = (texasrl_t) __builtin_get_texasr (); return _TEXASR_TRANSACTION_LEVEL (texasrl); } extern __inline long __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_is_nested_too_deep(void* const __TM_buff) { texasru_t texasru = *_TEXASRU_PTR (__TM_buff); return _TEXASRU_NESTING_OVERFLOW (texasru); } extern __inline long __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_is_conflict(void* const __TM_buff) { texasru_t texasru = *_TEXASRU_PTR (__TM_buff); /* Return TEXASR bits 11 (Self-Induced Conflict) through 14 (Translation Invalidation Conflict). */ return (_TEXASRU_EXTRACT_BITS (texasru, 14, 4)) ? 1 : 0; } extern __inline long __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_is_failure_persistent(void* const __TM_buff) { texasru_t texasru = *_TEXASRU_PTR (__TM_buff); return _TEXASRU_FAILURE_PERSISTENT (texasru); } extern __inline long __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_failure_address(void* const __TM_buff) { return *_TFIAR_PTR (__TM_buff); } extern __inline long long __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_failure_code(void* const __TM_buff) { return *_TEXASR_PTR (__TM_buff); } #ifdef __cplusplus } #endif #endif /* __powerpc__ */ #ifdef __s390__ #include /* These intrinsics are being made available for compatibility with the IBM XL compiler. For documentation please see the "z/OS XL C/C++ Programming Guide" publicly available on the web. */ static __inline long __attribute__((__always_inline__, __nodebug__)) __TM_simple_begin () { return __builtin_tbegin_nofloat (0); } static __inline long __attribute__((__always_inline__, __nodebug__)) __TM_begin (void* const __tdb) { return __builtin_tbegin_nofloat (__tdb); } static __inline long __attribute__((__always_inline__, __nodebug__)) __TM_end () { return __builtin_tend (); } static __inline void __attribute__((__always_inline__)) __TM_abort () { return __builtin_tabort (_HTM_FIRST_USER_ABORT_CODE); } static __inline void __attribute__((__always_inline__, __nodebug__)) __TM_named_abort (unsigned char const __code) { return __builtin_tabort ((int)_HTM_FIRST_USER_ABORT_CODE + __code); } static __inline void __attribute__((__always_inline__, __nodebug__)) __TM_non_transactional_store (void* const __addr, long long const __value) { __builtin_non_tx_store ((uint64_t*)__addr, (uint64_t)__value); } static __inline long __attribute__((__always_inline__, __nodebug__)) __TM_nesting_depth (void* const __tdb_ptr) { int depth = __builtin_tx_nesting_depth (); struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr; if (depth != 0) return depth; if (tdb->format != 1) return 0; return tdb->nesting_depth; } /* Transaction failure diagnostics */ static __inline long __attribute__((__always_inline__, __nodebug__)) __TM_is_user_abort (void* const __tdb_ptr) { struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr; if (tdb->format != 1) return 0; return !!(tdb->abort_code >= _HTM_FIRST_USER_ABORT_CODE); } static __inline long __attribute__((__always_inline__, __nodebug__)) __TM_is_named_user_abort (void* const __tdb_ptr, unsigned char* __code) { struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr; if (tdb->format != 1) return 0; if (tdb->abort_code >= _HTM_FIRST_USER_ABORT_CODE) { *__code = tdb->abort_code - _HTM_FIRST_USER_ABORT_CODE; return 1; } return 0; } static __inline long __attribute__((__always_inline__, __nodebug__)) __TM_is_illegal (void* const __tdb_ptr) { struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr; return (tdb->format == 1 && (tdb->abort_code == 4 /* unfiltered program interruption */ || tdb->abort_code == 11 /* restricted instruction */)); } static __inline long __attribute__((__always_inline__, __nodebug__)) __TM_is_footprint_exceeded (void* const __tdb_ptr) { struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr; return (tdb->format == 1 && (tdb->abort_code == 7 /* fetch overflow */ || tdb->abort_code == 8 /* store overflow */)); } static __inline long __attribute__((__always_inline__, __nodebug__)) __TM_is_nested_too_deep (void* const __tdb_ptr) { struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr; return tdb->format == 1 && tdb->abort_code == 13; /* depth exceeded */ } static __inline long __attribute__((__always_inline__, __nodebug__)) __TM_is_conflict (void* const __tdb_ptr) { struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr; return (tdb->format == 1 && (tdb->abort_code == 9 /* fetch conflict */ || tdb->abort_code == 10 /* store conflict */)); } static __inline long __attribute__((__always_inline__, __nodebug__)) __TM_is_failure_persistent (long const __result) { return __result == _HTM_TBEGIN_PERSISTENT; } static __inline long __attribute__((__always_inline__, __nodebug__)) __TM_failure_address (void* const __tdb_ptr) { struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr; return tdb->atia; } static __inline long __attribute__((__always_inline__, __nodebug__)) __TM_failure_code (void* const __tdb_ptr) { struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr; return tdb->abort_code; } #endif /* __s390__ */ #endif /* __HTMXLINTRIN_H */ /builtins/hvx_hexagon_protos.h//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // Automatically generated file, do not edit! //===----------------------------------------------------------------------===// #ifndef _HVX_HEXAGON_PROTOS_H_ #define _HVX_HEXAGON_PROTOS_H_ 1 #ifdef __HVX__ #if __HVX_LENGTH__ == 128 #define __BUILTIN_VECTOR_WRAP(a) a ## _128B #else #define __BUILTIN_VECTOR_WRAP(a) a #endif #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Rd32=vextract(Vu32,Rs32) C Intrinsic Prototype: Word32 Q6_R_vextract_VR(HVX_Vector Vu, Word32 Rs) Instruction Type: LD Execution Slots: SLOT0 ========================================================================== */ #define Q6_R_vextract_VR(Vu,Rs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_extractw)(Vu,Rs) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=hi(Vss32) C Intrinsic Prototype: HVX_Vector Q6_V_hi_W(HVX_VectorPair Vss) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_hi_W(Vss) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_hi)(Vss) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=lo(Vss32) C Intrinsic Prototype: HVX_Vector Q6_V_lo_W(HVX_VectorPair Vss) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_lo_W(Vss) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lo)(Vss) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vsplat(Rt32) C Intrinsic Prototype: HVX_Vector Q6_V_vsplat_R(Word32 Rt) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_V_vsplat_R(Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lvsplatw)(Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=and(Qs4,Qt4) C Intrinsic Prototype: HVX_VectorPred Q6_Q_and_QQ(HVX_VectorPred Qs, HVX_VectorPred Qt) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_and_QQ(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=and(Qs4,!Qt4) C Intrinsic Prototype: HVX_VectorPred Q6_Q_and_QQn(HVX_VectorPred Qs, HVX_VectorPred Qt) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_and_QQn(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_and_n)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=not(Qs4) C Intrinsic Prototype: HVX_VectorPred Q6_Q_not_Q(HVX_VectorPred Qs) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_not_Q(Qs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_not)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1))),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=or(Qs4,Qt4) C Intrinsic Prototype: HVX_VectorPred Q6_Q_or_QQ(HVX_VectorPred Qs, HVX_VectorPred Qt) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_or_QQ(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=or(Qs4,!Qt4) C Intrinsic Prototype: HVX_VectorPred Q6_Q_or_QQn(HVX_VectorPred Qs, HVX_VectorPred Qt) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_or_QQn(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_or_n)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=vsetq(Rt32) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vsetq_R(Word32 Rt) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vsetq_R(Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_scalar2)(Rt)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=xor(Qs4,Qt4) C Intrinsic Prototype: HVX_VectorPred Q6_Q_xor_QQ(HVX_VectorPred Qs, HVX_VectorPred Qt) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_xor_QQ(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (!Qv4) vmem(Rt32+#s4)=Vs32 C Intrinsic Prototype: void Q6_vmem_QnRIV(HVX_VectorPred Qv, HVX_Vector* Rt, HVX_Vector Vs) Instruction Type: CVI_VM_ST Execution Slots: SLOT0 ========================================================================== */ #define Q6_vmem_QnRIV(Qv,Rt,Vs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_nqpred_ai)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Rt,Vs) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (!Qv4) vmem(Rt32+#s4):nt=Vs32 C Intrinsic Prototype: void Q6_vmem_QnRIV_nt(HVX_VectorPred Qv, HVX_Vector* Rt, HVX_Vector Vs) Instruction Type: CVI_VM_ST Execution Slots: SLOT0 ========================================================================== */ #define Q6_vmem_QnRIV_nt(Qv,Rt,Vs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_nt_nqpred_ai)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Rt,Vs) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (Qv4) vmem(Rt32+#s4):nt=Vs32 C Intrinsic Prototype: void Q6_vmem_QRIV_nt(HVX_VectorPred Qv, HVX_Vector* Rt, HVX_Vector Vs) Instruction Type: CVI_VM_ST Execution Slots: SLOT0 ========================================================================== */ #define Q6_vmem_QRIV_nt(Qv,Rt,Vs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_nt_qpred_ai)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Rt,Vs) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (Qv4) vmem(Rt32+#s4)=Vs32 C Intrinsic Prototype: void Q6_vmem_QRIV(HVX_VectorPred Qv, HVX_Vector* Rt, HVX_Vector Vs) Instruction Type: CVI_VM_ST Execution Slots: SLOT0 ========================================================================== */ #define Q6_vmem_QRIV(Qv,Rt,Vs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_qpred_ai)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Rt,Vs) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vabsdiff(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vuh_vabsdiff_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vuh_vabsdiff_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vabsdiff(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_Vector Q6_Vub_vabsdiff_VubVub(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vub_vabsdiff_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffub)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vabsdiff(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_Vector Q6_Vuh_vabsdiff_VuhVuh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vuh_vabsdiff_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffuh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uw=vabsdiff(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vuw_vabsdiff_VwVw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vuw_vabsdiff_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vabs(Vu32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vabs_Vh(HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vabs_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsh)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vabs(Vu32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vabs_Vh_sat(HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vabs_Vh_sat(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsh_sat)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vabs(Vu32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vabs_Vw(HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vabs_Vw(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsw)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vabs(Vu32.w):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vabs_Vw_sat(HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vabs_Vw_sat(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsw_sat)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vadd(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vb_vadd_VbVb(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vadd_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.b=vadd(Vuu32.b,Vvv32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wb_vadd_WbWb(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wb_vadd_WbWb(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddb_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (!Qv4) Vx32.b+=Vu32.b C Intrinsic Prototype: HVX_Vector Q6_Vb_condacc_QnVbVb(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_condacc_QnVbVb(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (Qv4) Vx32.b+=Vu32.b C Intrinsic Prototype: HVX_Vector Q6_Vb_condacc_QVbVb(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_condacc_QVbVb(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vadd(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vadd_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vadd_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vadd(Vuu32.h,Vvv32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vadd_WhWh(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wh_vadd_WhWh(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddh_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (!Qv4) Vx32.h+=Vu32.h C Intrinsic Prototype: HVX_Vector Q6_Vh_condacc_QnVhVh(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_condacc_QnVhVh(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (Qv4) Vx32.h+=Vu32.h C Intrinsic Prototype: HVX_Vector Q6_Vh_condacc_QVhVh(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_condacc_QVhVh(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vadd(Vu32.h,Vv32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vadd_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vadd_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vadd(Vuu32.h,Vvv32.h):sat C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vadd_WhWh_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wh_vadd_WhWh_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vadd(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vadd_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vadd_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vadd(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vadd_VubVub(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vadd_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vadd(Vu32.ub,Vv32.ub):sat C Intrinsic Prototype: HVX_Vector Q6_Vub_vadd_VubVub_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vadd_VubVub_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.ub=vadd(Vuu32.ub,Vvv32.ub):sat C Intrinsic Prototype: HVX_VectorPair Q6_Wub_vadd_WubWub_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wub_vadd_WubWub_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vadd(Vu32.uh,Vv32.uh):sat C Intrinsic Prototype: HVX_Vector Q6_Vuh_vadd_VuhVuh_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vadd_VuhVuh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uh=vadd(Vuu32.uh,Vvv32.uh):sat C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vadd_WuhWuh_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wuh_vadd_WuhWuh_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vadd(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vadd_VuhVuh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vadd_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vadd(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vadd_VwVw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vadd_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vadd(Vuu32.w,Vvv32.w) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vadd_WwWw(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Ww_vadd_WwWw(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddw_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (!Qv4) Vx32.w+=Vu32.w C Intrinsic Prototype: HVX_Vector Q6_Vw_condacc_QnVwVw(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_condacc_QnVwVw(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (Qv4) Vx32.w+=Vu32.w C Intrinsic Prototype: HVX_Vector Q6_Vw_condacc_QVwVw(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_condacc_QVwVw(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vadd(Vu32.w,Vv32.w):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vadd_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vadd_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vadd(Vuu32.w,Vvv32.w):sat C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vadd_WwWw_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Ww_vadd_WwWw_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=valign(Vu32,Vv32,Rt8) C Intrinsic Prototype: HVX_Vector Q6_V_valign_VVR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_valign_VVR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_valignb)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=valign(Vu32,Vv32,#u3) C Intrinsic Prototype: HVX_Vector Q6_V_valign_VVI(HVX_Vector Vu, HVX_Vector Vv, Word32 Iu3) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_valign_VVI(Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_valignbi)(Vu,Vv,Iu3) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vand(Vu32,Vv32) C Intrinsic Prototype: HVX_Vector Q6_V_vand_VV(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_vand_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vand)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vand(Qu4,Rt32) C Intrinsic Prototype: HVX_Vector Q6_V_vand_QR(HVX_VectorPred Qu, Word32 Rt) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_V_vand_QR(Qu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qu),-1),Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32|=vand(Qu4,Rt32) C Intrinsic Prototype: HVX_Vector Q6_V_vandor_VQR(HVX_Vector Vx, HVX_VectorPred Qu, Word32 Rt) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_V_vandor_VQR(Vx,Qu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt_acc)(Vx,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qu),-1),Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=vand(Vu32,Rt32) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vand_VR(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_Q_vand_VR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)(Vu,Rt)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4|=vand(Vu32,Rt32) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vandor_QVR(HVX_VectorPred Qx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_Q_vandor_QVR(Qx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt_acc)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Rt)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vasl(Vu32.h,Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vh_vasl_VhR(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vasl_VhR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslh)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vasl(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vasl_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vasl_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslhv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vasl(Vu32.w,Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vw_vasl_VwR(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vasl_VwR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslw)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vasl(Vu32.w,Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vw_vaslacc_VwVwR(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vaslacc_VwVwR(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslw_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vasl(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vasl_VwVw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vasl_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslwv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vasr(Vu32.h,Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vh_vasr_VhR(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vasr_VhR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrh)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vasr(Vu32.h,Vv32.h,Rt8):rnd:sat C Intrinsic Prototype: HVX_Vector Q6_Vb_vasr_VhVhR_rnd_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vasr_VhVhR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhbrndsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vasr(Vu32.h,Vv32.h,Rt8):rnd:sat C Intrinsic Prototype: HVX_Vector Q6_Vub_vasr_VhVhR_rnd_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vasr_VhVhR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhubrndsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vasr(Vu32.h,Vv32.h,Rt8):sat C Intrinsic Prototype: HVX_Vector Q6_Vub_vasr_VhVhR_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vasr_VhVhR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhubsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vasr(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vasr_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vasr_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vasr(Vu32.w,Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vw_vasr_VwR(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vasr_VwR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrw)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vasr(Vu32.w,Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vw_vasracc_VwVwR(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vasracc_VwVwR(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrw_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vasr(Vu32.w,Vv32.w,Rt8) C Intrinsic Prototype: HVX_Vector Q6_Vh_vasr_VwVwR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vasr_VwVwR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwh)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vasr(Vu32.w,Vv32.w,Rt8):rnd:sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vasr_VwVwR_rnd_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vasr_VwVwR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwhrndsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vasr(Vu32.w,Vv32.w,Rt8):sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vasr_VwVwR_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vasr_VwVwR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwhsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vasr(Vu32.w,Vv32.w,Rt8):sat C Intrinsic Prototype: HVX_Vector Q6_Vuh_vasr_VwVwR_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vasr_VwVwR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwuhsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vasr(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vasr_VwVw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vasr_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=Vu32 C Intrinsic Prototype: HVX_Vector Q6_V_equals_V(HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_equals_V(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vassign)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32=Vuu32 C Intrinsic Prototype: HVX_VectorPair Q6_W_equals_W(HVX_VectorPair Vuu) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_W_equals_W(Vuu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vassignp)(Vuu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vavg(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vavg_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vavg_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vavg(Vu32.h,Vv32.h):rnd C Intrinsic Prototype: HVX_Vector Q6_Vh_vavg_VhVh_rnd(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vavg_VhVh_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavghrnd)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vavg(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_Vector Q6_Vub_vavg_VubVub(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vavg_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgub)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vavg(Vu32.ub,Vv32.ub):rnd C Intrinsic Prototype: HVX_Vector Q6_Vub_vavg_VubVub_rnd(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vavg_VubVub_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgubrnd)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vavg(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_Vector Q6_Vuh_vavg_VuhVuh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vavg_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vavg(Vu32.uh,Vv32.uh):rnd C Intrinsic Prototype: HVX_Vector Q6_Vuh_vavg_VuhVuh_rnd(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vavg_VuhVuh_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguhrnd)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vavg(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vavg_VwVw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vavg_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vavg(Vu32.w,Vv32.w):rnd C Intrinsic Prototype: HVX_Vector Q6_Vw_vavg_VwVw_rnd(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vavg_VwVw_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgwrnd)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vcl0(Vu32.uh) C Intrinsic Prototype: HVX_Vector Q6_Vuh_vcl0_Vuh(HVX_Vector Vu) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vcl0_Vuh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcl0h)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uw=vcl0(Vu32.uw) C Intrinsic Prototype: HVX_Vector Q6_Vuw_vcl0_Vuw(HVX_Vector Vu) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuw_vcl0_Vuw(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcl0w)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32=vcombine(Vu32,Vv32) C Intrinsic Prototype: HVX_VectorPair Q6_W_vcombine_VV(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_W_vcombine_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcombine)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=#0 C Intrinsic Prototype: HVX_Vector Q6_V_vzero() Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_vzero() __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vd0)() #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vdeal(Vu32.b) C Intrinsic Prototype: HVX_Vector Q6_Vb_vdeal_Vb(HVX_Vector Vu) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vdeal_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealb)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vdeale(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vb_vdeale_VbVb(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vdeale_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealb4w)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vdeal(Vu32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vdeal_Vh(HVX_Vector Vu) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vdeal_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealh)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32=vdeal(Vu32,Vv32,Rt8) C Intrinsic Prototype: HVX_VectorPair Q6_W_vdeal_VVR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VP_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_W_vdeal_VVR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealvdd)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vdelta(Vu32,Vv32) C Intrinsic Prototype: HVX_Vector Q6_V_vdelta_VV(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_vdelta_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdelta)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vdmpy(Vu32.ub,Rt32.b) C Intrinsic Prototype: HVX_Vector Q6_Vh_vdmpy_VubRb(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vh_vdmpy_VubRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.h+=vdmpy(Vu32.ub,Rt32.b) C Intrinsic Prototype: HVX_Vector Q6_Vh_vdmpyacc_VhVubRb(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vh_vdmpyacc_VhVubRb(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vdmpy(Vuu32.ub,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vdmpy_WubRb(HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vdmpy_WubRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus_dv)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.h+=vdmpy(Vuu32.ub,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vdmpyacc_WhWubRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vdmpyacc_WhWubRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus_dv_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vdmpy(Vu32.h,Rt32.b) C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_VhRb(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vdmpy_VhRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vdmpy(Vu32.h,Rt32.b) C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwVhRb(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vdmpyacc_VwVhRb(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vdmpy(Vuu32.h,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vdmpy_WhRb(HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vdmpy_WhRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb_dv)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.w+=vdmpy(Vuu32.h,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vdmpyacc_WwWhRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vdmpyacc_WwWhRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb_dv_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vdmpy(Vuu32.h,Rt32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_WhRh_sat(HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vdmpy_WhRh_sat(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhisat)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vdmpy(Vuu32.h,Rt32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwWhRh_sat(HVX_Vector Vx, HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vdmpyacc_VwWhRh_sat(Vx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhisat_acc)(Vx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vdmpy(Vu32.h,Rt32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_VhRh_sat(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vdmpy_VhRh_sat(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsat)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vdmpy(Vu32.h,Rt32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwVhRh_sat(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vdmpyacc_VwVhRh_sat(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsat_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vdmpy(Vuu32.h,Rt32.uh,#1):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_WhRuh_sat(HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vdmpy_WhRuh_sat(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsuisat)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vdmpy(Vuu32.h,Rt32.uh,#1):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwWhRuh_sat(HVX_Vector Vx, HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vdmpyacc_VwWhRuh_sat(Vx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsuisat_acc)(Vx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vdmpy(Vu32.h,Rt32.uh):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_VhRuh_sat(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vdmpy_VhRuh_sat(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsusat)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vdmpy(Vu32.h,Rt32.uh):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwVhRuh_sat(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vdmpyacc_VwVhRuh_sat(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsusat_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vdmpy(Vu32.h,Vv32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vdmpy_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhvsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vdmpy(Vu32.h,Vv32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwVhVh_sat(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vdmpyacc_VwVhVh_sat(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhvsat_acc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uw=vdsad(Vuu32.uh,Rt32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vdsad_WuhRuh(HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wuw_vdsad_WuhRuh(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdsaduh)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.uw+=vdsad(Vuu32.uh,Rt32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vdsadacc_WuwWuhRuh(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wuw_vdsadacc_WuwWuhRuh(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdsaduh_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=vcmp.eq(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eq_VbVb(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_eq_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4&=vcmp.eq(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqand_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_eqand_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4|=vcmp.eq(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqor_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_eqor_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4^=vcmp.eq(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqxacc_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_eqxacc_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=vcmp.eq(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eq_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_eq_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4&=vcmp.eq(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqand_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_eqand_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4|=vcmp.eq(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqor_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_eqor_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4^=vcmp.eq(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqxacc_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_eqxacc_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=vcmp.eq(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eq_VwVw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_eq_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4&=vcmp.eq(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqand_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_eqand_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4|=vcmp.eq(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqor_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_eqor_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4^=vcmp.eq(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqxacc_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_eqxacc_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=vcmp.gt(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VbVb(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gt_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4&=vcmp.gt(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtand_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4|=vcmp.gt(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtor_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4^=vcmp.gt(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtxacc_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=vcmp.gt(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gt_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4&=vcmp.gt(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtand_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4|=vcmp.gt(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtor_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4^=vcmp.gt(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtxacc_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=vcmp.gt(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VubVub(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gt_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4&=vcmp.gt(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVubVub(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtand_QVubVub(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4|=vcmp.gt(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVubVub(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtor_QVubVub(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4^=vcmp.gt(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVubVub(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtxacc_QVubVub(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=vcmp.gt(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VuhVuh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gt_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4&=vcmp.gt(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVuhVuh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtand_QVuhVuh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4|=vcmp.gt(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVuhVuh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtor_QVuhVuh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4^=vcmp.gt(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVuhVuh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtxacc_QVuhVuh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=vcmp.gt(Vu32.uw,Vv32.uw) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VuwVuw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gt_VuwVuw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4&=vcmp.gt(Vu32.uw,Vv32.uw) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVuwVuw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtand_QVuwVuw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4|=vcmp.gt(Vu32.uw,Vv32.uw) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVuwVuw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtor_QVuwVuw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4^=vcmp.gt(Vu32.uw,Vv32.uw) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVuwVuw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtxacc_QVuwVuw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qd4=vcmp.gt(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VwVw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gt_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4&=vcmp.gt(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtand_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4|=vcmp.gt(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtor_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Qx4^=vcmp.gt(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtxacc_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w=vinsert(Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vw_vinsert_VwR(HVX_Vector Vx, Word32 Rt) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vinsert_VwR(Vx,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vinsertwr)(Vx,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vlalign(Vu32,Vv32,Rt8) C Intrinsic Prototype: HVX_Vector Q6_V_vlalign_VVR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_vlalign_VVR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlalignb)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vlalign(Vu32,Vv32,#u3) C Intrinsic Prototype: HVX_Vector Q6_V_vlalign_VVI(HVX_Vector Vu, HVX_Vector Vv, Word32 Iu3) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_vlalign_VVI(Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlalignbi)(Vu,Vv,Iu3) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vlsr(Vu32.uh,Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vuh_vlsr_VuhR(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vlsr_VuhR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrh)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vlsr(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vlsr_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vlsr_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrhv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uw=vlsr(Vu32.uw,Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vuw_vlsr_VuwR(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuw_vlsr_VuwR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrw)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vlsr(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vlsr_VwVw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vlsr_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrwv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vlut32(Vu32.b,Vv32.b,Rt8) C Intrinsic Prototype: HVX_Vector Q6_Vb_vlut32_VbVbR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vlut32_VbVbR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.b|=vlut32(Vu32.b,Vv32.b,Rt8) C Intrinsic Prototype: HVX_Vector Q6_Vb_vlut32or_VbVbVbR(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VP_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vlut32or_VbVbVbR(Vx,Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb_oracc)(Vx,Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vlut16(Vu32.b,Vv32.h,Rt8) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vlut16_VbVhR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VP_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wh_vlut16_VbVhR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.h|=vlut16(Vu32.b,Vv32.h,Rt8) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vlut16or_WhVbVhR(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VP_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wh_vlut16or_WhVbVhR(Vxx,Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh_oracc)(Vxx,Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vmax(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vmax_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vmax_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vmax(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_Vector Q6_Vub_vmax_VubVub(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vmax_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxub)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vmax(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_Vector Q6_Vuh_vmax_VuhVuh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vmax_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxuh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vmax(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmax_VwVw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vmax_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vmin(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vmin_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vmin_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vmin(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_Vector Q6_Vub_vmin_VubVub(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vmin_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminub)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vmin(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_Vector Q6_Vuh_vmin_VuhVuh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vmin_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminuh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vmin(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmin_VwVw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vmin_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vmpa(Vuu32.ub,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpa_WubRb(HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vmpa_WubRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabus)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.h+=vmpa(Vuu32.ub,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpaacc_WhWubRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vmpaacc_WhWubRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabus_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vmpa(Vuu32.ub,Vvv32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpa_WubWb(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vmpa_WubWb(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabusv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vmpa(Vuu32.ub,Vvv32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpa_WubWub(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vmpa_WubWub(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabuuv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vmpa(Vuu32.h,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpa_WhRb(HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vmpa_WhRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpahb)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.w+=vmpa(Vuu32.h,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpaacc_WwWhRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vmpaacc_WwWhRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpahb_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vmpy(Vu32.ub,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpy_VubRb(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vmpy_VubRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybus)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.h+=vmpy(Vu32.ub,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpyacc_WhVubRb(HVX_VectorPair Vxx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vmpyacc_WhVubRb(Vxx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybus_acc)(Vxx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vmpy(Vu32.ub,Vv32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpy_VubVb(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vmpy_VubVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybusv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.h+=vmpy(Vu32.ub,Vv32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpyacc_WhVubVb(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vmpyacc_WhVubVb(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybusv_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vmpy(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpy_VbVb(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vmpy_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.h+=vmpy(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpyacc_WhVbVb(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vmpyacc_WhVbVb(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybv_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vmpye(Vu32.w,Vv32.uh) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpye_VwVuh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vmpye_VwVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyewuh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vmpy(Vu32.h,Rt32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpy_VhRh(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vmpy_VhRh(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyh)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.w+=vmpy(Vu32.h,Rt32.h):sat C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpyacc_WwVhRh_sat(HVX_VectorPair Vxx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vmpyacc_WwVhRh_sat(Vxx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhsat_acc)(Vxx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vmpy(Vu32.h,Rt32.h):<<1:rnd:sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpy_VhRh_s1_rnd_sat(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vh_vmpy_VhRh_s1_rnd_sat(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhsrs)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vmpy(Vu32.h,Rt32.h):<<1:sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpy_VhRh_s1_sat(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vh_vmpy_VhRh_s1_sat(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhss)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vmpy(Vu32.h,Vv32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpy_VhVuh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vmpy_VhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhus)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.w+=vmpy(Vu32.h,Vv32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpyacc_WwVhVuh(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vmpyacc_WwVhVuh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhus_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vmpy(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpy_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vmpy_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.w+=vmpy(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpyacc_WwVhVh(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vmpyacc_WwVhVh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhv_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vmpy(Vu32.h,Vv32.h):<<1:rnd:sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpy_VhVh_s1_rnd_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vh_vmpy_VhVh_s1_rnd_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhvsrs)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vmpyieo(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyieo_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vmpyieo_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyieoh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vmpyie(Vu32.w,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyieacc_VwVwVh(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vmpyieacc_VwVwVh(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiewh_acc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vmpyie(Vu32.w,Vv32.uh) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyie_VwVuh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vmpyie_VwVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiewuh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vmpyie(Vu32.w,Vv32.uh) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyieacc_VwVwVuh(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vmpyieacc_VwVwVuh(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiewuh_acc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vmpyi(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpyi_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vh_vmpyi_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyih)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.h+=vmpyi(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpyiacc_VhVhVh(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vh_vmpyiacc_VhVhVh(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyih_acc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vmpyi(Vu32.h,Rt32.b) C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpyi_VhRb(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vh_vmpyi_VhRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyihb)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.h+=vmpyi(Vu32.h,Rt32.b) C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpyiacc_VhVhRb(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vh_vmpyiacc_VhVhRb(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyihb_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vmpyio(Vu32.w,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyio_VwVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vmpyio_VwVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiowh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vmpyi(Vu32.w,Rt32.b) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyi_VwRb(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vmpyi_VwRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwb)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vmpyi(Vu32.w,Rt32.b) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyiacc_VwVwRb(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vmpyiacc_VwVwRb(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwb_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vmpyi(Vu32.w,Rt32.h) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyi_VwRh(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vmpyi_VwRh(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwh)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vmpyi(Vu32.w,Rt32.h) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyiacc_VwVwRh(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vmpyiacc_VwVwRh(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwh_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vmpyo(Vu32.w,Vv32.h):<<1:sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyo_VwVh_s1_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vmpyo_VwVh_s1_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vmpyo(Vu32.w,Vv32.h):<<1:rnd:sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyo_VwVh_s1_rnd_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vmpyo_VwVh_s1_rnd_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_rnd)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vmpyo(Vu32.w,Vv32.h):<<1:rnd:sat:shift C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyoacc_VwVwVh_s1_rnd_sat_shift(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vmpyoacc_VwVwVh_s1_rnd_sat_shift(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_rnd_sacc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vmpyo(Vu32.w,Vv32.h):<<1:sat:shift C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_sacc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uh=vmpy(Vu32.ub,Rt32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vmpy_VubRub(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wuh_vmpy_VubRub(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyub)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.uh+=vmpy(Vu32.ub,Rt32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vmpyacc_WuhVubRub(HVX_VectorPair Vxx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wuh_vmpyacc_WuhVubRub(Vxx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyub_acc)(Vxx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uh=vmpy(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vmpy_VubVub(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wuh_vmpy_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyubv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.uh+=vmpy(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vmpyacc_WuhVubVub(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wuh_vmpyacc_WuhVubVub(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyubv_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uw=vmpy(Vu32.uh,Rt32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vmpy_VuhRuh(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wuw_vmpy_VuhRuh(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuh)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.uw+=vmpy(Vu32.uh,Rt32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vmpyacc_WuwVuhRuh(HVX_VectorPair Vxx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wuw_vmpyacc_WuwVuhRuh(Vxx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuh_acc)(Vxx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uw=vmpy(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vmpy_VuhVuh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wuw_vmpy_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.uw+=vmpy(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vmpyacc_WuwVuhVuh(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wuw_vmpyacc_WuwVuhVuh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhv_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vmux(Qt4,Vu32,Vv32) C Intrinsic Prototype: HVX_Vector Q6_V_vmux_QVV(HVX_VectorPred Qt, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_vmux_QVV(Qt,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmux)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1),Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vnavg(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vnavg_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vnavg_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vnavg(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_Vector Q6_Vb_vnavg_VubVub(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vnavg_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgub)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vnavg(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vnavg_VwVw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vnavg_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vnormamt(Vu32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vnormamt_Vh(HVX_Vector Vu) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vnormamt_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnormamth)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vnormamt(Vu32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vnormamt_Vw(HVX_Vector Vu) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vnormamt_Vw(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnormamtw)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vnot(Vu32) C Intrinsic Prototype: HVX_Vector Q6_V_vnot_V(HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_vnot_V(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnot)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vor(Vu32,Vv32) C Intrinsic Prototype: HVX_Vector Q6_V_vor_VV(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_vor_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vor)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vpacke(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vb_vpacke_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vpacke_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackeb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vpacke(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vh_vpacke_VwVw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vpacke_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackeh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vpack(Vu32.h,Vv32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vb_vpack_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vpack_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackhb_sat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vpack(Vu32.h,Vv32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vub_vpack_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vpack_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackhub_sat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vpacko(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vb_vpacko_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vpacko_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackob)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vpacko(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vh_vpacko_VwVw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vpacko_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackoh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vpack(Vu32.w,Vv32.w):sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vpack_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vpack_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackwh_sat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vpack(Vu32.w,Vv32.w):sat C Intrinsic Prototype: HVX_Vector Q6_Vuh_vpack_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vpack_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackwuh_sat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vpopcount(Vu32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vpopcount_Vh(HVX_Vector Vu) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vpopcount_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpopcounth)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vrdelta(Vu32,Vv32) C Intrinsic Prototype: HVX_Vector Q6_V_vrdelta_VV(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_vrdelta_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrdelta)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vrmpy(Vu32.ub,Rt32.b) C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpy_VubRb(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vrmpy_VubRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybus)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vrmpy(Vu32.ub,Rt32.b) C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpyacc_VwVubRb(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vrmpyacc_VwVubRb(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybus_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vrmpy(Vuu32.ub,Rt32.b,#u1) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vrmpy_WubRbI(HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vrmpy_WubRbI(Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusi)(Vuu,Rt,Iu1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.w+=vrmpy(Vuu32.ub,Rt32.b,#u1) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vrmpyacc_WwWubRbI(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vrmpyacc_WwWubRbI(Vxx,Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusi_acc)(Vxx,Vuu,Rt,Iu1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vrmpy(Vu32.ub,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpy_VubVb(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vrmpy_VubVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vrmpy(Vu32.ub,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpyacc_VwVubVb(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vrmpyacc_VwVubVb(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusv_acc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vrmpy(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpy_VbVb(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vrmpy_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.w+=vrmpy(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpyacc_VwVbVb(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vrmpyacc_VwVbVb(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybv_acc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uw=vrmpy(Vu32.ub,Rt32.ub) C Intrinsic Prototype: HVX_Vector Q6_Vuw_vrmpy_VubRub(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vuw_vrmpy_VubRub(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyub)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.uw+=vrmpy(Vu32.ub,Rt32.ub) C Intrinsic Prototype: HVX_Vector Q6_Vuw_vrmpyacc_VuwVubRub(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vuw_vrmpyacc_VuwVubRub(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyub_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uw=vrmpy(Vuu32.ub,Rt32.ub,#u1) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vrmpy_WubRubI(HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wuw_vrmpy_WubRubI(Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubi)(Vuu,Rt,Iu1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.uw+=vrmpy(Vuu32.ub,Rt32.ub,#u1) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vrmpyacc_WuwWubRubI(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wuw_vrmpyacc_WuwWubRubI(Vxx,Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubi_acc)(Vxx,Vuu,Rt,Iu1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uw=vrmpy(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_Vector Q6_Vuw_vrmpy_VubVub(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vuw_vrmpy_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vx32.uw+=vrmpy(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_Vector Q6_Vuw_vrmpyacc_VuwVubVub(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vuw_vrmpyacc_VuwVubVub(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubv_acc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vror(Vu32,Rt32) C Intrinsic Prototype: HVX_Vector Q6_V_vror_VR(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_vror_VR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vror)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vround(Vu32.h,Vv32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vb_vround_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vround_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundhb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vround(Vu32.h,Vv32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vub_vround_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vround_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundhub)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vround(Vu32.w,Vv32.w):sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vround_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vround_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundwh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vround(Vu32.w,Vv32.w):sat C Intrinsic Prototype: HVX_Vector Q6_Vuh_vround_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vround_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundwuh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uw=vrsad(Vuu32.ub,Rt32.ub,#u1) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vrsad_WubRubI(HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wuw_vrsad_WubRubI(Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrsadubi)(Vuu,Rt,Iu1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.uw+=vrsad(Vuu32.ub,Rt32.ub,#u1) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vrsadacc_WuwWubRubI(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wuw_vrsadacc_WuwWubRubI(Vxx,Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrsadubi_acc)(Vxx,Vuu,Rt,Iu1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vsat(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vub_vsat_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vsat_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsathub)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vsat(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vh_vsat_VwVw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vsat_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsatwh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vsxt(Vu32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vsxt_Vb(HVX_Vector Vu) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wh_vsxt_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsb)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vsxt(Vu32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vsxt_Vh(HVX_Vector Vu) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Ww_vsxt_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsh)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vshuffe(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vshuffe_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vshuffe_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufeh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vshuff(Vu32.b) C Intrinsic Prototype: HVX_Vector Q6_Vb_vshuff_Vb(HVX_Vector Vu) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vshuff_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffb)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vshuffe(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vb_vshuffe_VbVb(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vshuffe_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffeb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vshuff(Vu32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vshuff_Vh(HVX_Vector Vu) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vshuff_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffh)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vshuffo(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vb_vshuffo_VbVb(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vshuffo_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffob)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32=vshuff(Vu32,Vv32,Rt8) C Intrinsic Prototype: HVX_VectorPair Q6_W_vshuff_VVR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VP_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_W_vshuff_VVR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffvdd)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.b=vshuffoe(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wb_vshuffoe_VbVb(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wb_vshuffoe_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufoeb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vshuffoe(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vshuffoe_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wh_vshuffoe_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufoeh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vshuffo(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vshuffo_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vshuffo_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufoh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.b=vsub(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vb_vsub_VbVb(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vsub_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.b=vsub(Vuu32.b,Vvv32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wb_vsub_WbWb(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wb_vsub_WbWb(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubb_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (!Qv4) Vx32.b-=Vu32.b C Intrinsic Prototype: HVX_Vector Q6_Vb_condnac_QnVbVb(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_condnac_QnVbVb(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (Qv4) Vx32.b-=Vu32.b C Intrinsic Prototype: HVX_Vector Q6_Vb_condnac_QVbVb(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_condnac_QVbVb(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vsub(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vsub_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vsub_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vsub(Vuu32.h,Vvv32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vsub_WhWh(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wh_vsub_WhWh(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubh_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (!Qv4) Vx32.h-=Vu32.h C Intrinsic Prototype: HVX_Vector Q6_Vh_condnac_QnVhVh(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_condnac_QnVhVh(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (Qv4) Vx32.h-=Vu32.h C Intrinsic Prototype: HVX_Vector Q6_Vh_condnac_QVhVh(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_condnac_QVhVh(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.h=vsub(Vu32.h,Vv32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vsub_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vsub_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vsub(Vuu32.h,Vvv32.h):sat C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vsub_WhWh_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wh_vsub_WhWh_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vsub(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vsub_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vsub_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vsub(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vsub_VubVub(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vsub_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsububh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.ub=vsub(Vu32.ub,Vv32.ub):sat C Intrinsic Prototype: HVX_Vector Q6_Vub_vsub_VubVub_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vsub_VubVub_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsububsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.ub=vsub(Vuu32.ub,Vvv32.ub):sat C Intrinsic Prototype: HVX_VectorPair Q6_Wub_vsub_WubWub_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wub_vsub_WubWub_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsububsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.uh=vsub(Vu32.uh,Vv32.uh):sat C Intrinsic Prototype: HVX_Vector Q6_Vuh_vsub_VuhVuh_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vsub_VuhVuh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuhsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uh=vsub(Vuu32.uh,Vvv32.uh):sat C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vsub_WuhWuh_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wuh_vsub_WuhWuh_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuhsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vsub(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vsub_VuhVuh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vsub_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuhw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vsub(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vsub_VwVw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vsub_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vsub(Vuu32.w,Vvv32.w) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vsub_WwWw(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Ww_vsub_WwWw(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubw_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (!Qv4) Vx32.w-=Vu32.w C Intrinsic Prototype: HVX_Vector Q6_Vw_condnac_QnVwVw(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_condnac_QnVwVw(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: if (Qv4) Vx32.w-=Vu32.w C Intrinsic Prototype: HVX_Vector Q6_Vw_condnac_QVwVw(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_condnac_QVwVw(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32.w=vsub(Vu32.w,Vv32.w):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vsub_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vsub_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vsub(Vuu32.w,Vvv32.w):sat C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vsub_WwWw_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Ww_vsub_WwWw_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32=vswap(Qt4,Vu32,Vv32) C Intrinsic Prototype: HVX_VectorPair Q6_W_vswap_QVV(HVX_VectorPred Qt, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_W_vswap_QVV(Qt,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vswap)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1),Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vtmpy(Vuu32.b,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vtmpy_WbRb(HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vtmpy_WbRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyb)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.h+=vtmpy(Vuu32.b,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vtmpyacc_WhWbRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vtmpyacc_WhWbRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyb_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vtmpy(Vuu32.ub,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vtmpy_WubRb(HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vtmpy_WubRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpybus)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.h+=vtmpy(Vuu32.ub,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vtmpyacc_WhWubRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vtmpyacc_WhWubRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpybus_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vtmpy(Vuu32.h,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vtmpy_WhRb(HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vtmpy_WhRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyhb)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.w+=vtmpy(Vuu32.h,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vtmpyacc_WwWhRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vtmpyacc_WwWhRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyhb_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.h=vunpack(Vu32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vunpack_Vb(HVX_Vector Vu) Instruction Type: CVI_VP_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wh_vunpack_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackb)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.w=vunpack(Vu32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vunpack_Vh(HVX_Vector Vu) Instruction Type: CVI_VP_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Ww_vunpack_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackh)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.h|=vunpacko(Vu32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vunpackoor_WhVb(HVX_VectorPair Vxx, HVX_Vector Vu) Instruction Type: CVI_VP_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wh_vunpackoor_WhVb(Vxx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackob)(Vxx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vxx32.w|=vunpacko(Vu32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vunpackoor_WwVh(HVX_VectorPair Vxx, HVX_Vector Vu) Instruction Type: CVI_VP_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Ww_vunpackoor_WwVh(Vxx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackoh)(Vxx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uh=vunpack(Vu32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vunpack_Vub(HVX_Vector Vu) Instruction Type: CVI_VP_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wuh_vunpack_Vub(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackub)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uw=vunpack(Vu32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vunpack_Vuh(HVX_Vector Vu) Instruction Type: CVI_VP_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wuw_vunpack_Vuh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackuh)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vd32=vxor(Vu32,Vv32) C Intrinsic Prototype: HVX_Vector Q6_V_vxor_VV(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_vxor_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vxor)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uh=vzxt(Vu32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vzxt_Vub(HVX_Vector Vu) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wuh_vzxt_Vub(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vzb)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 /* ========================================================================== Assembly Syntax: Vdd32.uw=vzxt(Vu32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vzxt_Vuh(HVX_Vector Vu) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wuw_vzxt_Vuh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vzh)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.b=vsplat(Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vb_vsplat_R(Word32 Rt) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vb_vsplat_R(Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lvsplatb)(Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.h=vsplat(Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vh_vsplat_R(Word32 Rt) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vh_vsplat_R(Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lvsplath)(Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Qd4=vsetq2(Rt32) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vsetq2_R(Word32 Rt) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vsetq2_R(Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_scalar2v2)(Rt)),-1) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Qd4.b=vshuffe(Qs4.h,Qt4.h) C Intrinsic Prototype: HVX_VectorPred Q6_Qb_vshuffe_QhQh(HVX_VectorPred Qs, HVX_VectorPred Qt) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Qb_vshuffe_QhQh(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_shuffeqh)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Qd4.h=vshuffe(Qs4.w,Qt4.w) C Intrinsic Prototype: HVX_VectorPred Q6_Qh_vshuffe_QwQw(HVX_VectorPred Qs, HVX_VectorPred Qt) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Qh_vshuffe_QwQw(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_shuffeqw)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.b=vadd(Vu32.b,Vv32.b):sat C Intrinsic Prototype: HVX_Vector Q6_Vb_vadd_VbVb_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vadd_VbVb_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vdd32.b=vadd(Vuu32.b,Vvv32.b):sat C Intrinsic Prototype: HVX_VectorPair Q6_Wb_vadd_WbWb_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wb_vadd_WbWb_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.w=vadd(Vu32.w,Vv32.w,Qx4):carry C Intrinsic Prototype: HVX_Vector Q6_Vw_vadd_VwVwQ_carry(HVX_Vector Vu, HVX_Vector Vv, HVX_VectorPred* Qx) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vadd_VwVwQ_carry(Vu,Vv,Qx) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddcarry)(Vu,Vv,Qx) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.h=vadd(vclb(Vu32.h),Vv32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vadd_vclb_VhVh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vadd_vclb_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddclbh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.w=vadd(vclb(Vu32.w),Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vadd_vclb_VwVw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vadd_vclb_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddclbw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vxx32.w+=vadd(Vu32.h,Vv32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vaddacc_WwVhVh(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vaddacc_WwVhVh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhw_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vxx32.h+=vadd(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vaddacc_WhVubVub(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vaddacc_WhVubVub(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubh_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.ub=vadd(Vu32.ub,Vv32.b):sat C Intrinsic Prototype: HVX_Vector Q6_Vub_vadd_VubVb_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vadd_VubVb_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddububb_sat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vxx32.w+=vadd(Vu32.uh,Vv32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vaddacc_WwVuhVuh(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vaddacc_WwVuhVuh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhw_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.uw=vadd(Vu32.uw,Vv32.uw):sat C Intrinsic Prototype: HVX_Vector Q6_Vuw_vadd_VuwVuw_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuw_vadd_VuwVuw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduwsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vdd32.uw=vadd(Vuu32.uw,Vvv32.uw):sat C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vadd_WuwWuw_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wuw_vadd_WuwWuw_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduwsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32=vand(!Qu4,Rt32) C Intrinsic Prototype: HVX_Vector Q6_V_vand_QnR(HVX_VectorPred Qu, Word32 Rt) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_V_vand_QnR(Qu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandnqrt)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qu),-1),Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vx32|=vand(!Qu4,Rt32) C Intrinsic Prototype: HVX_Vector Q6_V_vandor_VQnR(HVX_Vector Vx, HVX_VectorPred Qu, Word32 Rt) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_V_vandor_VQnR(Vx,Qu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandnqrt_acc)(Vx,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qu),-1),Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32=vand(!Qv4,Vu32) C Intrinsic Prototype: HVX_Vector Q6_V_vand_QnV(HVX_VectorPred Qv, HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_vand_QnV(Qv,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvnqv)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vu) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32=vand(Qv4,Vu32) C Intrinsic Prototype: HVX_Vector Q6_V_vand_QV(HVX_VectorPred Qv, HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_V_vand_QV(Qv,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvqv)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vu) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.b=vasr(Vu32.h,Vv32.h,Rt8):sat C Intrinsic Prototype: HVX_Vector Q6_Vb_vasr_VhVhR_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vasr_VhVhR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhbsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.uh=vasr(Vu32.uw,Vv32.uw,Rt8):rnd:sat C Intrinsic Prototype: HVX_Vector Q6_Vuh_vasr_VuwVuwR_rnd_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vasr_VuwVuwR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruwuhrndsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.uh=vasr(Vu32.w,Vv32.w,Rt8):rnd:sat C Intrinsic Prototype: HVX_Vector Q6_Vuh_vasr_VwVwR_rnd_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vasr_VwVwR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwuhrndsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.ub=vlsr(Vu32.ub,Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vub_vlsr_VubR(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vlsr_VubR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrb)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.b=vlut32(Vu32.b,Vv32.b,Rt8):nomatch C Intrinsic Prototype: HVX_Vector Q6_Vb_vlut32_VbVbR_nomatch(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vlut32_VbVbR_nomatch(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb_nm)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vx32.b|=vlut32(Vu32.b,Vv32.b,#u3) C Intrinsic Prototype: HVX_Vector Q6_Vb_vlut32or_VbVbVbI(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv, Word32 Iu3) Instruction Type: CVI_VP_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vlut32or_VbVbVbI(Vx,Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb_oracci)(Vx,Vu,Vv,Iu3) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.b=vlut32(Vu32.b,Vv32.b,#u3) C Intrinsic Prototype: HVX_Vector Q6_Vb_vlut32_VbVbI(HVX_Vector Vu, HVX_Vector Vv, Word32 Iu3) Instruction Type: CVI_VP Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vlut32_VbVbI(Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvbi)(Vu,Vv,Iu3) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vdd32.h=vlut16(Vu32.b,Vv32.h,Rt8):nomatch C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vlut16_VbVhR_nomatch(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VP_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wh_vlut16_VbVhR_nomatch(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh_nm)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vxx32.h|=vlut16(Vu32.b,Vv32.h,#u3) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vlut16or_WhVbVhI(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv, Word32 Iu3) Instruction Type: CVI_VP_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wh_vlut16or_WhVbVhI(Vxx,Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh_oracci)(Vxx,Vu,Vv,Iu3) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vdd32.h=vlut16(Vu32.b,Vv32.h,#u3) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vlut16_VbVhI(HVX_Vector Vu, HVX_Vector Vv, Word32 Iu3) Instruction Type: CVI_VP_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wh_vlut16_VbVhI(Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwhi)(Vu,Vv,Iu3) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.b=vmax(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vb_vmax_VbVb(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vmax_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.b=vmin(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vb_vmin_VbVb(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vmin_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vdd32.w=vmpa(Vuu32.uh,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpa_WuhRb(HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vmpa_WuhRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpauhb)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vxx32.w+=vmpa(Vuu32.uh,Rt32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpaacc_WwWuhRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vmpaacc_WwWuhRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpauhb_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vdd32=vmpye(Vu32.w,Vv32.uh) C Intrinsic Prototype: HVX_VectorPair Q6_W_vmpye_VwVuh(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_W_vmpye_VwVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyewuh_64)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.w=vmpyi(Vu32.w,Rt32.ub) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyi_VwRub(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vmpyi_VwRub(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwub)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vx32.w+=vmpyi(Vu32.w,Rt32.ub) C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyiacc_VwVwRub(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vmpyiacc_VwVwRub(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwub_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vxx32+=vmpyo(Vu32.w,Vv32.h) C Intrinsic Prototype: HVX_VectorPair Q6_W_vmpyoacc_WVwVh(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_W_vmpyoacc_WVwVh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_64_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.ub=vround(Vu32.uh,Vv32.uh):sat C Intrinsic Prototype: HVX_Vector Q6_Vub_vround_VuhVuh_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vround_VuhVuh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrounduhub)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.uh=vround(Vu32.uw,Vv32.uw):sat C Intrinsic Prototype: HVX_Vector Q6_Vuh_vround_VuwVuw_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vround_VuwVuw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrounduwuh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.uh=vsat(Vu32.uw,Vv32.uw) C Intrinsic Prototype: HVX_Vector Q6_Vuh_vsat_VuwVuw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vsat_VuwVuw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsatuwuh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.b=vsub(Vu32.b,Vv32.b):sat C Intrinsic Prototype: HVX_Vector Q6_Vb_vsub_VbVb_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vsub_VbVb_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vdd32.b=vsub(Vuu32.b,Vvv32.b):sat C Intrinsic Prototype: HVX_VectorPair Q6_Wb_vsub_WbWb_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wb_vsub_WbWb_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.w=vsub(Vu32.w,Vv32.w,Qx4):carry C Intrinsic Prototype: HVX_Vector Q6_Vw_vsub_VwVwQ_carry(HVX_Vector Vu, HVX_Vector Vv, HVX_VectorPred* Qx) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vsub_VwVwQ_carry(Vu,Vv,Qx) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubcarry)(Vu,Vv,Qx) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.ub=vsub(Vu32.ub,Vv32.b):sat C Intrinsic Prototype: HVX_Vector Q6_Vub_vsub_VubVb_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vsub_VubVb_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubububb_sat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vd32.uw=vsub(Vu32.uw,Vv32.uw):sat C Intrinsic Prototype: HVX_Vector Q6_Vuw_vsub_VuwVuw_sat(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuw_vsub_VuwVuw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuwsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 /* ========================================================================== Assembly Syntax: Vdd32.uw=vsub(Vuu32.uw,Vvv32.uw):sat C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vsub_WuwWuw_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) Instruction Type: CVI_VA_DV Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Wuw_vsub_WuwWuw_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuwsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vd32.b=vabs(Vu32.b) C Intrinsic Prototype: HVX_Vector Q6_Vb_vabs_Vb(HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vabs_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsb)(Vu) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vd32.b=vabs(Vu32.b):sat C Intrinsic Prototype: HVX_Vector Q6_Vb_vabs_Vb_sat(HVX_Vector Vu) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vabs_Vb_sat(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsb_sat)(Vu) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vx32.h+=vasl(Vu32.h,Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vh_vaslacc_VhVhR(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vaslacc_VhVhR(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslh_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vx32.h+=vasr(Vu32.h,Rt32) C Intrinsic Prototype: HVX_Vector Q6_Vh_vasracc_VhVhR(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_vasracc_VhVhR(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrh_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vd32.ub=vasr(Vu32.uh,Vv32.uh,Rt8):rnd:sat C Intrinsic Prototype: HVX_Vector Q6_Vub_vasr_VuhVuhR_rnd_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vasr_VuhVuhR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruhubrndsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vd32.ub=vasr(Vu32.uh,Vv32.uh,Rt8):sat C Intrinsic Prototype: HVX_Vector Q6_Vub_vasr_VuhVuhR_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vasr_VuhVuhR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruhubsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vd32.uh=vasr(Vu32.uw,Vv32.uw,Rt8):sat C Intrinsic Prototype: HVX_Vector Q6_Vuh_vasr_VuwVuwR_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vasr_VuwVuwR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruwuhsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vd32.b=vavg(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vb_vavg_VbVb(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vavg_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vd32.b=vavg(Vu32.b,Vv32.b):rnd C Intrinsic Prototype: HVX_Vector Q6_Vb_vavg_VbVb_rnd(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vavg_VbVb_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgbrnd)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vd32.uw=vavg(Vu32.uw,Vv32.uw) C Intrinsic Prototype: HVX_Vector Q6_Vuw_vavg_VuwVuw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuw_vavg_VuwVuw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vd32.uw=vavg(Vu32.uw,Vv32.uw):rnd C Intrinsic Prototype: HVX_Vector Q6_Vuw_vavg_VuwVuw_rnd(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuw_vavg_VuwVuw_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguwrnd)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vdd32=#0 C Intrinsic Prototype: HVX_VectorPair Q6_W_vzero() Instruction Type: MAPPING Execution Slots: SLOT0123 ========================================================================== */ #define Q6_W_vzero() __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdd0)() #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: vtmp.h=vgather(Rt32,Mu2,Vv32.h).h C Intrinsic Prototype: void Q6_vgather_ARMVh(HVX_Vector* Rs, Word32 Rt, Word32 Mu, HVX_Vector Vv) Instruction Type: CVI_GATHER Execution Slots: SLOT01 ========================================================================== */ #define Q6_vgather_ARMVh(Rs,Rt,Mu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermh)(Rs,Rt,Mu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: if (Qs4) vtmp.h=vgather(Rt32,Mu2,Vv32.h).h C Intrinsic Prototype: void Q6_vgather_AQRMVh(HVX_Vector* Rs, HVX_VectorPred Qs, Word32 Rt, Word32 Mu, HVX_Vector Vv) Instruction Type: CVI_GATHER Execution Slots: SLOT01 ========================================================================== */ #define Q6_vgather_AQRMVh(Rs,Qs,Rt,Mu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermhq)(Rs,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: vtmp.h=vgather(Rt32,Mu2,Vvv32.w).h C Intrinsic Prototype: void Q6_vgather_ARMWw(HVX_Vector* Rs, Word32 Rt, Word32 Mu, HVX_VectorPair Vvv) Instruction Type: CVI_GATHER_DV Execution Slots: SLOT01 ========================================================================== */ #define Q6_vgather_ARMWw(Rs,Rt,Mu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermhw)(Rs,Rt,Mu,Vvv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: if (Qs4) vtmp.h=vgather(Rt32,Mu2,Vvv32.w).h C Intrinsic Prototype: void Q6_vgather_AQRMWw(HVX_Vector* Rs, HVX_VectorPred Qs, Word32 Rt, Word32 Mu, HVX_VectorPair Vvv) Instruction Type: CVI_GATHER_DV Execution Slots: SLOT01 ========================================================================== */ #define Q6_vgather_AQRMWw(Rs,Qs,Rt,Mu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermhwq)(Rs,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vvv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: vtmp.w=vgather(Rt32,Mu2,Vv32.w).w C Intrinsic Prototype: void Q6_vgather_ARMVw(HVX_Vector* Rs, Word32 Rt, Word32 Mu, HVX_Vector Vv) Instruction Type: CVI_GATHER Execution Slots: SLOT01 ========================================================================== */ #define Q6_vgather_ARMVw(Rs,Rt,Mu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermw)(Rs,Rt,Mu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: if (Qs4) vtmp.w=vgather(Rt32,Mu2,Vv32.w).w C Intrinsic Prototype: void Q6_vgather_AQRMVw(HVX_Vector* Rs, HVX_VectorPred Qs, Word32 Rt, Word32 Mu, HVX_Vector Vv) Instruction Type: CVI_GATHER Execution Slots: SLOT01 ========================================================================== */ #define Q6_vgather_AQRMVw(Rs,Qs,Rt,Mu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermwq)(Rs,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vd32.h=vlut4(Vu32.uh,Rtt32.h) C Intrinsic Prototype: HVX_Vector Q6_Vh_vlut4_VuhPh(HVX_Vector Vu, Word64 Rtt) Instruction Type: CVI_VX_DV Execution Slots: SLOT2 ========================================================================== */ #define Q6_Vh_vlut4_VuhPh(Vu,Rtt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlut4)(Vu,Rtt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vdd32.h=vmpa(Vuu32.ub,Rt32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpa_WubRub(HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vmpa_WubRub(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabuu)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vxx32.h+=vmpa(Vuu32.ub,Rt32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpaacc_WhWubRub(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wh_vmpaacc_WhWubRub(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabuu_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vx32.h=vmpa(Vx32.h,Vu32.h,Rtt32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpa_VhVhVhPh_sat(HVX_Vector Vx, HVX_Vector Vu, Word64 Rtt) Instruction Type: CVI_VX_DV Execution Slots: SLOT2 ========================================================================== */ #define Q6_Vh_vmpa_VhVhVhPh_sat(Vx,Vu,Rtt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpahhsat)(Vx,Vu,Rtt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vx32.h=vmpa(Vx32.h,Vu32.uh,Rtt32.uh):sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpa_VhVhVuhPuh_sat(HVX_Vector Vx, HVX_Vector Vu, Word64 Rtt) Instruction Type: CVI_VX_DV Execution Slots: SLOT2 ========================================================================== */ #define Q6_Vh_vmpa_VhVhVuhPuh_sat(Vx,Vu,Rtt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpauhuhsat)(Vx,Vu,Rtt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vx32.h=vmps(Vx32.h,Vu32.uh,Rtt32.uh):sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vmps_VhVhVuhPuh_sat(HVX_Vector Vx, HVX_Vector Vu, Word64 Rtt) Instruction Type: CVI_VX_DV Execution Slots: SLOT2 ========================================================================== */ #define Q6_Vh_vmps_VhVhVuhPuh_sat(Vx,Vu,Rtt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpsuhuhsat)(Vx,Vu,Rtt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vxx32.w+=vmpy(Vu32.h,Rt32.h) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpyacc_WwVhRh(HVX_VectorPair Vxx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_vmpyacc_WwVhRh(Vxx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyh_acc)(Vxx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vd32.uw=vmpye(Vu32.uh,Rt32.uh) C Intrinsic Prototype: HVX_Vector Q6_Vuw_vmpye_VuhRuh(HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vuw_vmpye_VuhRuh(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhe)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vx32.uw+=vmpye(Vu32.uh,Rt32.uh) C Intrinsic Prototype: HVX_Vector Q6_Vuw_vmpyeacc_VuwVuhRuh(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vuw_vmpyeacc_VuwVuhRuh(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhe_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vd32.b=vnavg(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vb_vnavg_VbVb(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_vnavg_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vd32.b=prefixsum(Qv4) C Intrinsic Prototype: HVX_Vector Q6_Vb_prefixsum_Q(HVX_VectorPred Qv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vb_prefixsum_Q(Qv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vprefixqb)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1)) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vd32.h=prefixsum(Qv4) C Intrinsic Prototype: HVX_Vector Q6_Vh_prefixsum_Q(HVX_VectorPred Qv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vh_prefixsum_Q(Qv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vprefixqh)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1)) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Vd32.w=prefixsum(Qv4) C Intrinsic Prototype: HVX_Vector Q6_Vw_prefixsum_Q(HVX_VectorPred Qv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_prefixsum_Q(Qv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vprefixqw)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1)) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: vscatter(Rt32,Mu2,Vv32.h).h=Vw32 C Intrinsic Prototype: void Q6_vscatter_RMVhV(Word32 Rt, Word32 Mu, HVX_Vector Vv, HVX_Vector Vw) Instruction Type: CVI_SCATTER Execution Slots: SLOT0 ========================================================================== */ #define Q6_vscatter_RMVhV(Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermh)(Rt,Mu,Vv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: vscatter(Rt32,Mu2,Vv32.h).h+=Vw32 C Intrinsic Prototype: void Q6_vscatteracc_RMVhV(Word32 Rt, Word32 Mu, HVX_Vector Vv, HVX_Vector Vw) Instruction Type: CVI_SCATTER Execution Slots: SLOT0 ========================================================================== */ #define Q6_vscatteracc_RMVhV(Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermh_add)(Rt,Mu,Vv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: if (Qs4) vscatter(Rt32,Mu2,Vv32.h).h=Vw32 C Intrinsic Prototype: void Q6_vscatter_QRMVhV(HVX_VectorPred Qs, Word32 Rt, Word32 Mu, HVX_Vector Vv, HVX_Vector Vw) Instruction Type: CVI_SCATTER Execution Slots: SLOT0 ========================================================================== */ #define Q6_vscatter_QRMVhV(Qs,Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: vscatter(Rt32,Mu2,Vvv32.w).h=Vw32 C Intrinsic Prototype: void Q6_vscatter_RMWwV(Word32 Rt, Word32 Mu, HVX_VectorPair Vvv, HVX_Vector Vw) Instruction Type: CVI_SCATTER_DV Execution Slots: SLOT0 ========================================================================== */ #define Q6_vscatter_RMWwV(Rt,Mu,Vvv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhw)(Rt,Mu,Vvv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: vscatter(Rt32,Mu2,Vvv32.w).h+=Vw32 C Intrinsic Prototype: void Q6_vscatteracc_RMWwV(Word32 Rt, Word32 Mu, HVX_VectorPair Vvv, HVX_Vector Vw) Instruction Type: CVI_SCATTER_DV Execution Slots: SLOT0 ========================================================================== */ #define Q6_vscatteracc_RMWwV(Rt,Mu,Vvv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhw_add)(Rt,Mu,Vvv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: if (Qs4) vscatter(Rt32,Mu2,Vvv32.w).h=Vw32 C Intrinsic Prototype: void Q6_vscatter_QRMWwV(HVX_VectorPred Qs, Word32 Rt, Word32 Mu, HVX_VectorPair Vvv, HVX_Vector Vw) Instruction Type: CVI_SCATTER_DV Execution Slots: SLOT0 ========================================================================== */ #define Q6_vscatter_QRMWwV(Qs,Rt,Mu,Vvv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhwq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vvv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: vscatter(Rt32,Mu2,Vv32.w).w=Vw32 C Intrinsic Prototype: void Q6_vscatter_RMVwV(Word32 Rt, Word32 Mu, HVX_Vector Vv, HVX_Vector Vw) Instruction Type: CVI_SCATTER Execution Slots: SLOT0 ========================================================================== */ #define Q6_vscatter_RMVwV(Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermw)(Rt,Mu,Vv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: vscatter(Rt32,Mu2,Vv32.w).w+=Vw32 C Intrinsic Prototype: void Q6_vscatteracc_RMVwV(Word32 Rt, Word32 Mu, HVX_Vector Vv, HVX_Vector Vw) Instruction Type: CVI_SCATTER Execution Slots: SLOT0 ========================================================================== */ #define Q6_vscatteracc_RMVwV(Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermw_add)(Rt,Mu,Vv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: if (Qs4) vscatter(Rt32,Mu2,Vv32.w).w=Vw32 C Intrinsic Prototype: void Q6_vscatter_QRMVwV(HVX_VectorPred Qs, Word32 Rt, Word32 Mu, HVX_Vector Vv, HVX_Vector Vw) Instruction Type: CVI_SCATTER Execution Slots: SLOT0 ========================================================================== */ #define Q6_vscatter_QRMVwV(Qs,Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermwq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 66 /* ========================================================================== Assembly Syntax: Vd32.w=vadd(Vu32.w,Vv32.w,Qs4):carry:sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vadd_VwVwQ_carry_sat(HVX_Vector Vu, HVX_Vector Vv, HVX_VectorPred Qs) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vadd_VwVwQ_carry_sat(Vu,Vv,Qs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddcarrysat)(Vu,Vv,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1)) #endif /* __HEXAGON_ARCH___ >= 66 */ #if __HVX_ARCH__ >= 66 /* ========================================================================== Assembly Syntax: Vxx32.w=vasrinto(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vasrinto_WwVwVw(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VP_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Ww_vasrinto_WwVwVw(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasr_into)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 66 */ #if __HVX_ARCH__ >= 66 /* ========================================================================== Assembly Syntax: Vd32.uw=vrotr(Vu32.uw,Vv32.uw) C Intrinsic Prototype: HVX_Vector Q6_Vuw_vrotr_VuwVuw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuw_vrotr_VuwVuw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrotr)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 66 */ #if __HVX_ARCH__ >= 66 /* ========================================================================== Assembly Syntax: Vd32.w=vsatdw(Vu32.w,Vv32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vsatdw_VwVw(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vw_vsatdw_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsatdw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 66 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vdd32.w=v6mpy(Vuu32.ub,Vvv32.b,#u2):h C Intrinsic Prototype: HVX_VectorPair Q6_Ww_v6mpy_WubWbI_h(HVX_VectorPair Vuu, HVX_VectorPair Vvv, Word32 Iu2) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_v6mpy_WubWbI_h(Vuu,Vvv,Iu2) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyhubs10)(Vuu,Vvv,Iu2) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vxx32.w+=v6mpy(Vuu32.ub,Vvv32.b,#u2):h C Intrinsic Prototype: HVX_VectorPair Q6_Ww_v6mpyacc_WwWubWbI_h(HVX_VectorPair Vxx, HVX_VectorPair Vuu, HVX_VectorPair Vvv, Word32 Iu2) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_v6mpyacc_WwWubWbI_h(Vxx,Vuu,Vvv,Iu2) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyhubs10_vxx)(Vxx,Vuu,Vvv,Iu2) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vdd32.w=v6mpy(Vuu32.ub,Vvv32.b,#u2):v C Intrinsic Prototype: HVX_VectorPair Q6_Ww_v6mpy_WubWbI_v(HVX_VectorPair Vuu, HVX_VectorPair Vvv, Word32 Iu2) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_v6mpy_WubWbI_v(Vuu,Vvv,Iu2) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyvubs10)(Vuu,Vvv,Iu2) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vxx32.w+=v6mpy(Vuu32.ub,Vvv32.b,#u2):v C Intrinsic Prototype: HVX_VectorPair Q6_Ww_v6mpyacc_WwWubWbI_v(HVX_VectorPair Vxx, HVX_VectorPair Vuu, HVX_VectorPair Vvv, Word32 Iu2) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Ww_v6mpyacc_WwWubWbI_v(Vxx,Vuu,Vvv,Iu2) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyvubs10_vxx)(Vxx,Vuu,Vvv,Iu2) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.hf=vabs(Vu32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vabs_Vhf(HVX_Vector Vu) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vhf_vabs_Vhf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabs_hf)(Vu) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.sf=vabs(Vu32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vsf_vabs_Vsf(HVX_Vector Vu) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vsf_vabs_Vsf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabs_sf)(Vu) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.qf16=vadd(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vadd_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vqf16_vadd_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.hf=vadd(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vadd_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vhf_vadd_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_hf_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.qf16=vadd(Vu32.qf16,Vv32.qf16) C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vadd_Vqf16Vqf16(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vqf16_vadd_Vqf16Vqf16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_qf16)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.qf16=vadd(Vu32.qf16,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vadd_Vqf16Vhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vqf16_vadd_Vqf16Vhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_qf16_mix)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.qf32=vadd(Vu32.qf32,Vv32.qf32) C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vadd_Vqf32Vqf32(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vqf32_vadd_Vqf32Vqf32(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_qf32)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.qf32=vadd(Vu32.qf32,Vv32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vadd_Vqf32Vsf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vqf32_vadd_Vqf32Vsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_qf32_mix)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.qf32=vadd(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vadd_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vqf32_vadd_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_sf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vdd32.sf=vadd(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vadd_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wsf_vadd_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_sf_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.sf=vadd(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vsf_vadd_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vsf_vadd_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_sf_sf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.w=vfmv(Vu32.w) C Intrinsic Prototype: HVX_Vector Q6_Vw_vfmv_Vw(HVX_Vector Vu) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vw_vfmv_Vw(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vassign_fp)(Vu) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.hf=Vu32.qf16 C Intrinsic Prototype: HVX_Vector Q6_Vhf_equals_Vqf16(HVX_Vector Vu) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vhf_equals_Vqf16(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_hf_qf16)(Vu) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.hf=Vuu32.qf32 C Intrinsic Prototype: HVX_Vector Q6_Vhf_equals_Wqf32(HVX_VectorPair Vuu) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vhf_equals_Wqf32(Vuu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_hf_qf32)(Vuu) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.sf=Vu32.qf32 C Intrinsic Prototype: HVX_Vector Q6_Vsf_equals_Vqf32(HVX_Vector Vu) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vsf_equals_Vqf32(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_sf_qf32)(Vu) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.b=vcvt(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vb_vcvt_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vb_vcvt_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_b_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.h=vcvt(Vu32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vh_vcvt_Vhf(HVX_Vector Vu) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vh_vcvt_Vhf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_h_hf)(Vu) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vdd32.hf=vcvt(Vu32.b) C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vcvt_Vb(HVX_Vector Vu) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Whf_vcvt_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_b)(Vu) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.hf=vcvt(Vu32.h) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vcvt_Vh(HVX_Vector Vu) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vhf_vcvt_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_h)(Vu) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.hf=vcvt(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vcvt_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vhf_vcvt_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_sf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vdd32.hf=vcvt(Vu32.ub) C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vcvt_Vub(HVX_Vector Vu) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Whf_vcvt_Vub(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_ub)(Vu) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.hf=vcvt(Vu32.uh) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vcvt_Vuh(HVX_Vector Vu) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vhf_vcvt_Vuh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_uh)(Vu) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vdd32.sf=vcvt(Vu32.hf) C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vcvt_Vhf(HVX_Vector Vu) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wsf_vcvt_Vhf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_sf_hf)(Vu) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.ub=vcvt(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vub_vcvt_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vub_vcvt_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_ub_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.uh=vcvt(Vu32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vuh_vcvt_Vhf(HVX_Vector Vu) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vuh_vcvt_Vhf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_uh_hf)(Vu) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.sf=vdmpy(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vsf_vdmpy_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vsf_vdmpy_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpy_sf_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vx32.sf+=vdmpy(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vsf_vdmpyacc_VsfVhfVhf(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vsf_vdmpyacc_VsfVhfVhf(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpy_sf_hf_acc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.hf=vfmax(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vfmax_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vhf_vfmax_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmax_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.sf=vfmax(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vsf_vfmax_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vsf_vfmax_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmax_sf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.hf=vfmin(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vfmin_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vhf_vfmin_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmin_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.sf=vfmin(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vsf_vfmin_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vsf_vfmin_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmin_sf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.hf=vfneg(Vu32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vfneg_Vhf(HVX_Vector Vu) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vhf_vfneg_Vhf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfneg_hf)(Vu) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.sf=vfneg(Vu32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vsf_vfneg_Vsf(HVX_Vector Vu) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vsf_vfneg_Vsf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfneg_sf)(Vu) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Qd4=vcmp.gt(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gt_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgthf)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Qx4&=vcmp.gt(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVhfVhf(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtand_QVhfVhf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgthf_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Qx4|=vcmp.gt(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVhfVhf(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtor_QVhfVhf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgthf_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Qx4^=vcmp.gt(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVhfVhf(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtxacc_QVhfVhf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgthf_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Qd4=vcmp.gt(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gt_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtsf)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Qx4&=vcmp.gt(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVsfVsf(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtand_QVsfVsf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtsf_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Qx4|=vcmp.gt(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVsfVsf(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtor_QVsfVsf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtsf_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Qx4^=vcmp.gt(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVsfVsf(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Q_vcmp_gtxacc_QVsfVsf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtsf_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.hf=vmax(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vmax_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vhf_vmax_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmax_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.sf=vmax(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vsf_vmax_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vsf_vmax_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmax_sf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.hf=vmin(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vmin_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vhf_vmin_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmin_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.sf=vmin(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vsf_vmin_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vsf_vmin_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmin_sf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.hf=vmpy(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vmpy_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vhf_vmpy_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_hf_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vx32.hf+=vmpy(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vmpyacc_VhfVhfVhf(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vhf_vmpyacc_VhfVhfVhf(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_hf_hf_acc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.qf16=vmpy(Vu32.qf16,Vv32.qf16) C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vmpy_Vqf16Vqf16(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vqf16_vmpy_Vqf16Vqf16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf16)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.qf16=vmpy(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vmpy_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vqf16_vmpy_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf16_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.qf16=vmpy(Vu32.qf16,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vmpy_Vqf16Vhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vqf16_vmpy_Vqf16Vhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf16_mix_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.qf32=vmpy(Vu32.qf32,Vv32.qf32) C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vmpy_Vqf32Vqf32(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vqf32_vmpy_Vqf32Vqf32(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf32)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vdd32.qf32=vmpy(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_VectorPair Q6_Wqf32_vmpy_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wqf32_vmpy_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf32_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vdd32.qf32=vmpy(Vu32.qf16,Vv32.hf) C Intrinsic Prototype: HVX_VectorPair Q6_Wqf32_vmpy_Vqf16Vhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wqf32_vmpy_Vqf16Vhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf32_mix_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vdd32.qf32=vmpy(Vu32.qf16,Vv32.qf16) C Intrinsic Prototype: HVX_VectorPair Q6_Wqf32_vmpy_Vqf16Vqf16(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wqf32_vmpy_Vqf16Vqf16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf32_qf16)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.qf32=vmpy(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vmpy_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vqf32_vmpy_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf32_sf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vdd32.sf=vmpy(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vmpy_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wsf_vmpy_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_sf_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vxx32.sf+=vmpy(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vmpyacc_WsfVhfVhf(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wsf_vmpyacc_WsfVhfVhf(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_sf_hf_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.sf=vmpy(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vsf_vmpy_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vsf_vmpy_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_sf_sf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.qf16=vsub(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vsub_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vqf16_vsub_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.hf=vsub(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vhf_vsub_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vhf_vsub_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_hf_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.qf16=vsub(Vu32.qf16,Vv32.qf16) C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vsub_Vqf16Vqf16(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vqf16_vsub_Vqf16Vqf16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_qf16)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.qf16=vsub(Vu32.qf16,Vv32.hf) C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vsub_Vqf16Vhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vqf16_vsub_Vqf16Vhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_qf16_mix)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.qf32=vsub(Vu32.qf32,Vv32.qf32) C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vsub_Vqf32Vqf32(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vqf32_vsub_Vqf32Vqf32(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_qf32)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.qf32=vsub(Vu32.qf32,Vv32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vsub_Vqf32Vsf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vqf32_vsub_Vqf32Vsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_qf32_mix)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.qf32=vsub(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vsub_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vqf32_vsub_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_sf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vdd32.sf=vsub(Vu32.hf,Vv32.hf) C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vsub_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 ========================================================================== */ #define Q6_Wsf_vsub_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_sf_hf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 /* ========================================================================== Assembly Syntax: Vd32.sf=vsub(Vu32.sf,Vv32.sf) C Intrinsic Prototype: HVX_Vector Q6_Vsf_vsub_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vsf_vsub_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_sf_sf)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 69 /* ========================================================================== Assembly Syntax: Vd32.ub=vasr(Vuu32.uh,Vv32.ub):rnd:sat C Intrinsic Prototype: HVX_Vector Q6_Vub_vasr_WuhVub_rnd_sat(HVX_VectorPair Vuu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vasr_WuhVub_rnd_sat(Vuu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrvuhubrndsat)(Vuu,Vv) #endif /* __HEXAGON_ARCH___ >= 69 */ #if __HVX_ARCH__ >= 69 /* ========================================================================== Assembly Syntax: Vd32.ub=vasr(Vuu32.uh,Vv32.ub):sat C Intrinsic Prototype: HVX_Vector Q6_Vub_vasr_WuhVub_sat(HVX_VectorPair Vuu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vub_vasr_WuhVub_sat(Vuu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrvuhubsat)(Vuu,Vv) #endif /* __HEXAGON_ARCH___ >= 69 */ #if __HVX_ARCH__ >= 69 /* ========================================================================== Assembly Syntax: Vd32.uh=vasr(Vuu32.w,Vv32.uh):rnd:sat C Intrinsic Prototype: HVX_Vector Q6_Vuh_vasr_WwVuh_rnd_sat(HVX_VectorPair Vuu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vasr_WwVuh_rnd_sat(Vuu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrvwuhrndsat)(Vuu,Vv) #endif /* __HEXAGON_ARCH___ >= 69 */ #if __HVX_ARCH__ >= 69 /* ========================================================================== Assembly Syntax: Vd32.uh=vasr(Vuu32.w,Vv32.uh):sat C Intrinsic Prototype: HVX_Vector Q6_Vuh_vasr_WwVuh_sat(HVX_VectorPair Vuu, HVX_Vector Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 ========================================================================== */ #define Q6_Vuh_vasr_WwVuh_sat(Vuu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrvwuhsat)(Vuu,Vv) #endif /* __HEXAGON_ARCH___ >= 69 */ #if __HVX_ARCH__ >= 69 /* ========================================================================== Assembly Syntax: Vd32.uh=vmpy(Vu32.uh,Vv32.uh):>>16 C Intrinsic Prototype: HVX_Vector Q6_Vuh_vmpy_VuhVuh_rs16(HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ #define Q6_Vuh_vmpy_VuhVuh_rs16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhvs)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 69 */ #endif /* __HVX__ */ #endif /* ===-------- ia32intrin.h ---------------------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __X86INTRIN_H #error "Never use directly; include instead." #endif #ifndef __IA32INTRIN_H #define __IA32INTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) #define __DEFAULT_FN_ATTRS_CRC32 __attribute__((__always_inline__, __nodebug__, __target__("crc32"))) #if defined(__cplusplus) && (__cplusplus >= 201103L) #define __DEFAULT_FN_ATTRS_CAST __attribute__((__always_inline__)) constexpr #define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr #else #define __DEFAULT_FN_ATTRS_CAST __attribute__((__always_inline__)) #define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS #endif /** Find the first set bit starting from the lsb. Result is undefined if * input is 0. * * \headerfile * * This intrinsic corresponds to the BSF instruction or the * TZCNT instruction. * * \param __A * A 32-bit integer operand. * \returns A 32-bit integer containing the bit number. */ static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR __bsfd(int __A) { return __builtin_ctz((unsigned int)__A); } /** Find the first set bit starting from the msb. Result is undefined if * input is 0. * * \headerfile * * This intrinsic corresponds to the BSR instruction or the * LZCNT instruction and an XOR . * * \param __A * A 32-bit integer operand. * \returns A 32-bit integer containing the bit number. */ static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR __bsrd(int __A) { return 31 - __builtin_clz((unsigned int)__A); } /** Swaps the bytes in the input. Converting little endian to big endian or * vice versa. * * \headerfile * * This intrinsic corresponds to the BSWAP instruction. * * \param __A * A 32-bit integer operand. * \returns A 32-bit integer containing the swapped bytes. */ static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR __bswapd(int __A) { return (int)__builtin_bswap32((unsigned int)__A); } static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _bswap(int __A) { return (int)__builtin_bswap32((unsigned int)__A); } #define _bit_scan_forward(A) __bsfd((A)) #define _bit_scan_reverse(A) __bsrd((A)) #ifdef __x86_64__ /** Find the first set bit starting from the lsb. Result is undefined if * input is 0. * * \headerfile * * This intrinsic corresponds to the BSF instruction or the * TZCNT instruction. * * \param __A * A 64-bit integer operand. * \returns A 32-bit integer containing the bit number. */ static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR __bsfq(long long __A) { return (long long)__builtin_ctzll((unsigned long long)__A); } /** Find the first set bit starting from the msb. Result is undefined if * input is 0. * * \headerfile * * This intrinsic corresponds to the BSR instruction or the * LZCNT instruction and an XOR . * * \param __A * A 64-bit integer operand. * \returns A 32-bit integer containing the bit number. */ static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR __bsrq(long long __A) { return 63 - __builtin_clzll((unsigned long long)__A); } /** Swaps the bytes in the input. Converting little endian to big endian or * vice versa. * * \headerfile * * This intrinsic corresponds to the BSWAP instruction. * * \param __A * A 64-bit integer operand. * \returns A 64-bit integer containing the swapped bytes. */ static __inline__ long long __DEFAULT_FN_ATTRS_CONSTEXPR __bswapq(long long __A) { return (long long)__builtin_bswap64((unsigned long long)__A); } #define _bswap64(A) __bswapq((A)) #endif /** Counts the number of bits in the source operand having a value of 1. * * \headerfile * * This intrinsic corresponds to the POPCNT instruction or a * a sequence of arithmetic and logic ops to calculate it. * * \param __A * An unsigned 32-bit integer operand. * \returns A 32-bit integer containing the number of bits with value 1 in the * source operand. */ static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR __popcntd(unsigned int __A) { return __builtin_popcount(__A); } #define _popcnt32(A) __popcntd((A)) #ifdef __x86_64__ /** Counts the number of bits in the source operand having a value of 1. * * \headerfile * * This intrinsic corresponds to the POPCNT instruction or a * a sequence of arithmetic and logic ops to calculate it. * * \param __A * An unsigned 64-bit integer operand. * \returns A 64-bit integer containing the number of bits with value 1 in the * source operand. */ static __inline__ long long __DEFAULT_FN_ATTRS_CONSTEXPR __popcntq(unsigned long long __A) { return __builtin_popcountll(__A); } #define _popcnt64(A) __popcntq((A)) #endif /* __x86_64__ */ #ifdef __x86_64__ static __inline__ unsigned long long __DEFAULT_FN_ATTRS __readeflags(void) { return __builtin_ia32_readeflags_u64(); } static __inline__ void __DEFAULT_FN_ATTRS __writeeflags(unsigned long long __f) { __builtin_ia32_writeeflags_u64(__f); } #else /* !__x86_64__ */ static __inline__ unsigned int __DEFAULT_FN_ATTRS __readeflags(void) { return __builtin_ia32_readeflags_u32(); } static __inline__ void __DEFAULT_FN_ATTRS __writeeflags(unsigned int __f) { __builtin_ia32_writeeflags_u32(__f); } #endif /* !__x86_64__ */ /** Cast a 32-bit float value to a 32-bit unsigned integer value * * \headerfile * This intrinsic corresponds to the VMOVD / MOVD instruction in x86_64, * and corresponds to the VMOVL / MOVL instruction in ia32. * * \param __A * A 32-bit float value. * \returns a 32-bit unsigned integer containing the converted value. */ static __inline__ unsigned int __DEFAULT_FN_ATTRS_CAST _castf32_u32(float __A) { return __builtin_bit_cast(unsigned int, __A); } /** Cast a 64-bit float value to a 64-bit unsigned integer value * * \headerfile * This intrinsic corresponds to the VMOVQ / MOVQ instruction in x86_64, * and corresponds to the VMOVL / MOVL instruction in ia32. * * \param __A * A 64-bit float value. * \returns a 64-bit unsigned integer containing the converted value. */ static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CAST _castf64_u64(double __A) { return __builtin_bit_cast(unsigned long long, __A); } /** Cast a 32-bit unsigned integer value to a 32-bit float value * * \headerfile * This intrinsic corresponds to the VMOVQ / MOVQ instruction in x86_64, * and corresponds to the FLDS instruction in ia32. * * \param __A * A 32-bit unsigned integer value. * \returns a 32-bit float value containing the converted value. */ static __inline__ float __DEFAULT_FN_ATTRS_CAST _castu32_f32(unsigned int __A) { return __builtin_bit_cast(float, __A); } /** Cast a 64-bit unsigned integer value to a 64-bit float value * * \headerfile * This intrinsic corresponds to the VMOVQ / MOVQ instruction in x86_64, * and corresponds to the FLDL instruction in ia32. * * \param __A * A 64-bit unsigned integer value. * \returns a 64-bit float value containing the converted value. */ static __inline__ double __DEFAULT_FN_ATTRS_CAST _castu64_f64(unsigned long long __A) { return __builtin_bit_cast(double, __A); } /** Adds the unsigned integer operand to the CRC-32C checksum of the * unsigned char operand. * * \headerfile * * This intrinsic corresponds to the CRC32B instruction. * * \param __C * An unsigned integer operand to add to the CRC-32C checksum of operand * \a __D. * \param __D * An unsigned 8-bit integer operand used to compute the CRC-32C checksum. * \returns The result of adding operand \a __C to the CRC-32C checksum of * operand \a __D. */ static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32 __crc32b(unsigned int __C, unsigned char __D) { return __builtin_ia32_crc32qi(__C, __D); } /** Adds the unsigned integer operand to the CRC-32C checksum of the * unsigned short operand. * * \headerfile * * This intrinsic corresponds to the CRC32W instruction. * * \param __C * An unsigned integer operand to add to the CRC-32C checksum of operand * \a __D. * \param __D * An unsigned 16-bit integer operand used to compute the CRC-32C checksum. * \returns The result of adding operand \a __C to the CRC-32C checksum of * operand \a __D. */ static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32 __crc32w(unsigned int __C, unsigned short __D) { return __builtin_ia32_crc32hi(__C, __D); } /** Adds the unsigned integer operand to the CRC-32C checksum of the * second unsigned integer operand. * * \headerfile * * This intrinsic corresponds to the CRC32D instruction. * * \param __C * An unsigned integer operand to add to the CRC-32C checksum of operand * \a __D. * \param __D * An unsigned 32-bit integer operand used to compute the CRC-32C checksum. * \returns The result of adding operand \a __C to the CRC-32C checksum of * operand \a __D. */ static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32 __crc32d(unsigned int __C, unsigned int __D) { return __builtin_ia32_crc32si(__C, __D); } #ifdef __x86_64__ /** Adds the unsigned integer operand to the CRC-32C checksum of the * unsigned 64-bit integer operand. * * \headerfile * * This intrinsic corresponds to the CRC32Q instruction. * * \param __C * An unsigned integer operand to add to the CRC-32C checksum of operand * \a __D. * \param __D * An unsigned 64-bit integer operand used to compute the CRC-32C checksum. * \returns The result of adding operand \a __C to the CRC-32C checksum of * operand \a __D. */ static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CRC32 __crc32q(unsigned long long __C, unsigned long long __D) { return __builtin_ia32_crc32di(__C, __D); } #endif /* __x86_64__ */ static __inline__ unsigned long long __DEFAULT_FN_ATTRS __rdpmc(int __A) { return __builtin_ia32_rdpmc(__A); } /* __rdtscp */ static __inline__ unsigned long long __DEFAULT_FN_ATTRS __rdtscp(unsigned int *__A) { return __builtin_ia32_rdtscp(__A); } #define _rdtsc() __rdtsc() #define _rdpmc(A) __rdpmc(A) static __inline__ void __DEFAULT_FN_ATTRS _wbinvd(void) { __builtin_ia32_wbinvd(); } static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR __rolb(unsigned char __X, int __C) { return __builtin_rotateleft8(__X, __C); } static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR __rorb(unsigned char __X, int __C) { return __builtin_rotateright8(__X, __C); } static __inline__ unsigned short __DEFAULT_FN_ATTRS_CONSTEXPR __rolw(unsigned short __X, int __C) { return __builtin_rotateleft16(__X, __C); } static __inline__ unsigned short __DEFAULT_FN_ATTRS_CONSTEXPR __rorw(unsigned short __X, int __C) { return __builtin_rotateright16(__X, __C); } static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR __rold(unsigned int __X, int __C) { return __builtin_rotateleft32(__X, (unsigned int)__C); } static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR __rord(unsigned int __X, int __C) { return __builtin_rotateright32(__X, (unsigned int)__C); } #ifdef __x86_64__ static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CONSTEXPR __rolq(unsigned long long __X, int __C) { return __builtin_rotateleft64(__X, (unsigned long long)__C); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CONSTEXPR __rorq(unsigned long long __X, int __C) { return __builtin_rotateright64(__X, (unsigned long long)__C); } #endif /* __x86_64__ */ #ifndef _MSC_VER /* These are already provided as builtins for MSVC. */ /* Select the correct function based on the size of long. */ #ifdef __LP64__ #define _lrotl(a,b) __rolq((a), (b)) #define _lrotr(a,b) __rorq((a), (b)) #else #define _lrotl(a,b) __rold((a), (b)) #define _lrotr(a,b) __rord((a), (b)) #endif #define _rotl(a,b) __rold((a), (b)) #define _rotr(a,b) __rord((a), (b)) #endif // _MSC_VER /* These are not builtins so need to be provided in all modes. */ #define _rotwl(a,b) __rolw((a), (b)) #define _rotwr(a,b) __rorw((a), (b)) #undef __DEFAULT_FN_ATTRS #undef __DEFAULT_FN_ATTRS_CAST #undef __DEFAULT_FN_ATTRS_CRC32 #undef __DEFAULT_FN_ATTRS_CONSTEXPR #endif /* __IA32INTRIN_H */ /*===---- immintrin.h - Intel intrinsics -----------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #define __IMMINTRIN_H #if !defined(__i386__) && !defined(__x86_64__) #error "This header is only meant to be used on x86 and x64 architecture" #endif #include #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__MMX__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__SSE__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__SSE2__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__SSE3__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__SSSE3__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__SSE4_2__) || defined(__SSE4_1__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AES__) || defined(__PCLMUL__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__CLFLUSHOPT__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__CLWB__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX2__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__F16C__) #include #endif /* No feature check desired due to internal checks */ #include #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__BMI2__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__LZCNT__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__POPCNT__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__FMA__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512F__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512VL__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512BW__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512BITALG__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512CD__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512VPOPCNTDQ__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512VNNI__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512VNNI__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVXVNNI__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512DQ__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512BITALG__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512BW__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512CD__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512DQ__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512ER__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512IFMA__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512IFMA__) && defined(__AVX512VL__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVXIFMA__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512VBMI__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VBMI__) && defined(__AVX512VL__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512VBMI2__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VBMI2__) && defined(__AVX512VL__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512PF__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512FP16__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512FP16__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512BF16__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512BF16__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__PKU__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__VPCLMULQDQ__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__VAES__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__GFNI__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVXVNNIINT8__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVXNECONVERT__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__SHA512__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__SM3__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__SM4__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVXVNNIINT16__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__RDPID__) /// Returns the value of the IA32_TSC_AUX MSR (0xc0000103). /// /// \headerfile /// /// This intrinsic corresponds to the RDPID instruction. static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid"))) _rdpid_u32(void) { return __builtin_ia32_rdpid(); } #endif // __RDPID__ #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__RDRND__) /// Returns a 16-bit hardware-generated random value. /// /// \headerfile /// /// This intrinsic corresponds to the RDRAND instruction. /// /// \param __p /// A pointer to a 16-bit memory location to place the random value. /// \returns 1 if the value was successfully generated, 0 otherwise. static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) _rdrand16_step(unsigned short *__p) { return (int)__builtin_ia32_rdrand16_step(__p); } /// Returns a 32-bit hardware-generated random value. /// /// \headerfile /// /// This intrinsic corresponds to the RDRAND instruction. /// /// \param __p /// A pointer to a 32-bit memory location to place the random value. /// \returns 1 if the value was successfully generated, 0 otherwise. static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) _rdrand32_step(unsigned int *__p) { return (int)__builtin_ia32_rdrand32_step(__p); } /// Returns a 64-bit hardware-generated random value. /// /// \headerfile /// /// This intrinsic corresponds to the RDRAND instruction. /// /// \param __p /// A pointer to a 64-bit memory location to place the random value. /// \returns 1 if the value was successfully generated, 0 otherwise. static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) _rdrand64_step(unsigned long long *__p) { #ifdef __x86_64__ return (int)__builtin_ia32_rdrand64_step(__p); #else // We need to emulate the functionality of 64-bit rdrand with 2 32-bit // rdrand instructions. unsigned int __lo, __hi; unsigned int __res_lo = __builtin_ia32_rdrand32_step(&__lo); unsigned int __res_hi = __builtin_ia32_rdrand32_step(&__hi); if (__res_lo && __res_hi) { *__p = ((unsigned long long)__hi << 32) | (unsigned long long)__lo; return 1; } else { *__p = 0; return 0; } #endif } #endif /* __RDRND__ */ #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__FSGSBASE__) #ifdef __x86_64__ /// Reads the FS base register. /// /// \headerfile /// /// This intrinsic corresponds to the RDFSBASE instruction. /// /// \returns The lower 32 bits of the FS base register. static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _readfsbase_u32(void) { return __builtin_ia32_rdfsbase32(); } /// Reads the FS base register. /// /// \headerfile /// /// This intrinsic corresponds to the RDFSBASE instruction. /// /// \returns The contents of the FS base register. static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _readfsbase_u64(void) { return __builtin_ia32_rdfsbase64(); } /// Reads the GS base register. /// /// \headerfile /// /// This intrinsic corresponds to the RDGSBASE instruction. /// /// \returns The lower 32 bits of the GS base register. static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _readgsbase_u32(void) { return __builtin_ia32_rdgsbase32(); } /// Reads the GS base register. /// /// \headerfile /// /// This intrinsic corresponds to the RDGSBASE instruction. /// /// \returns The contents of the GS base register. static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _readgsbase_u64(void) { return __builtin_ia32_rdgsbase64(); } /// Modifies the FS base register. /// /// \headerfile /// /// This intrinsic corresponds to the WRFSBASE instruction. /// /// \param __V /// Value to use for the lower 32 bits of the FS base register. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _writefsbase_u32(unsigned int __V) { __builtin_ia32_wrfsbase32(__V); } /// Modifies the FS base register. /// /// \headerfile /// /// This intrinsic corresponds to the WRFSBASE instruction. /// /// \param __V /// Value to use for the FS base register. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _writefsbase_u64(unsigned long long __V) { __builtin_ia32_wrfsbase64(__V); } /// Modifies the GS base register. /// /// \headerfile /// /// This intrinsic corresponds to the WRGSBASE instruction. /// /// \param __V /// Value to use for the lower 32 bits of the GS base register. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _writegsbase_u32(unsigned int __V) { __builtin_ia32_wrgsbase32(__V); } /// Modifies the GS base register. /// /// \headerfile /// /// This intrinsic corresponds to the WRFSBASE instruction. /// /// \param __V /// Value to use for GS base register. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _writegsbase_u64(unsigned long long __V) { __builtin_ia32_wrgsbase64(__V); } #endif #endif /* __FSGSBASE__ */ #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__MOVBE__) /* The structs used below are to force the load/store to be unaligned. This * is accomplished with the __packed__ attribute. The __may_alias__ prevents * tbaa metadata from being generated based on the struct and the type of the * field inside of it. */ static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) _loadbe_i16(void const * __P) { struct __loadu_i16 { unsigned short __v; } __attribute__((__packed__, __may_alias__)); return (short)__builtin_bswap16(((const struct __loadu_i16*)__P)->__v); } static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) _storebe_i16(void * __P, short __D) { struct __storeu_i16 { unsigned short __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_i16*)__P)->__v = __builtin_bswap16((unsigned short)__D); } static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) _loadbe_i32(void const * __P) { struct __loadu_i32 { unsigned int __v; } __attribute__((__packed__, __may_alias__)); return (int)__builtin_bswap32(((const struct __loadu_i32*)__P)->__v); } static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) _storebe_i32(void * __P, int __D) { struct __storeu_i32 { unsigned int __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_i32*)__P)->__v = __builtin_bswap32((unsigned int)__D); } #ifdef __x86_64__ static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) _loadbe_i64(void const * __P) { struct __loadu_i64 { unsigned long long __v; } __attribute__((__packed__, __may_alias__)); return (long long)__builtin_bswap64(((const struct __loadu_i64*)__P)->__v); } static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) _storebe_i64(void * __P, long long __D) { struct __storeu_i64 { unsigned long long __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_i64*)__P)->__v = __builtin_bswap64((unsigned long long)__D); } #endif #endif /* __MOVBE */ #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__RTM__) #include #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__SHA__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__FXSR__) #include #endif /* No feature check desired due to internal MSC_VER checks */ #include #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__XSAVEOPT__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__XSAVEC__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__XSAVES__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__SHSTK__) #include #endif /* Some intrinsics inside adxintrin.h are available only on processors with ADX, * whereas others are also available at all times. */ #include #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__RDSEED__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__WBNOINVD__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__CLDEMOTE__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__WAITPKG__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__MOVDIRI__) || defined(__MOVDIR64B__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__PCONFIG__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__SGX__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__PTWRITE__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__INVPCID__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AMX_FP16__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__KL__) || defined(__WIDEKL__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AMX_TILE__) || defined(__AMX_INT8__) || defined(__AMX_BF16__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AMX_COMPLEX__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512VP2INTERSECT__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__)) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__ENQCMD__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__SERIALIZE__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__TSXLDTRK__) #include #endif #if defined(_MSC_VER) && __has_extension(gnu_asm) /* Define the default attributes for these intrinsics */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) #ifdef __cplusplus extern "C" { #endif /*----------------------------------------------------------------------------*\ |* Interlocked Exchange HLE \*----------------------------------------------------------------------------*/ #if defined(__i386__) || defined(__x86_64__) static __inline__ long __DEFAULT_FN_ATTRS _InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) { __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}" : "+r" (_Value), "+m" (*_Target) :: "memory"); return _Value; } static __inline__ long __DEFAULT_FN_ATTRS _InterlockedExchange_HLERelease(long volatile *_Target, long _Value) { __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}" : "+r" (_Value), "+m" (*_Target) :: "memory"); return _Value; } #endif #if defined(__x86_64__) static __inline__ __int64 __DEFAULT_FN_ATTRS _InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) { __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}" : "+r" (_Value), "+m" (*_Target) :: "memory"); return _Value; } static __inline__ __int64 __DEFAULT_FN_ATTRS _InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) { __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}" : "+r" (_Value), "+m" (*_Target) :: "memory"); return _Value; } #endif /*----------------------------------------------------------------------------*\ |* Interlocked Compare Exchange HLE \*----------------------------------------------------------------------------*/ #if defined(__i386__) || defined(__x86_64__) static __inline__ long __DEFAULT_FN_ATTRS _InterlockedCompareExchange_HLEAcquire(long volatile *_Destination, long _Exchange, long _Comparand) { __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}" : "+a" (_Comparand), "+m" (*_Destination) : "r" (_Exchange) : "memory"); return _Comparand; } static __inline__ long __DEFAULT_FN_ATTRS _InterlockedCompareExchange_HLERelease(long volatile *_Destination, long _Exchange, long _Comparand) { __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}" : "+a" (_Comparand), "+m" (*_Destination) : "r" (_Exchange) : "memory"); return _Comparand; } #endif #if defined(__x86_64__) static __inline__ __int64 __DEFAULT_FN_ATTRS _InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination, __int64 _Exchange, __int64 _Comparand) { __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}" : "+a" (_Comparand), "+m" (*_Destination) : "r" (_Exchange) : "memory"); return _Comparand; } static __inline__ __int64 __DEFAULT_FN_ATTRS _InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination, __int64 _Exchange, __int64 _Comparand) { __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}" : "+a" (_Comparand), "+m" (*_Destination) : "r" (_Exchange) : "memory"); return _Comparand; } #endif #ifdef __cplusplus } #endif #undef __DEFAULT_FN_ATTRS #endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */ #endif /* __IMMINTRIN_H */ /* ===-------- intrin.h ---------------------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ /* Only include this if we're compiling for the windows platform. */ #ifndef _MSC_VER #include_next #else #ifndef __INTRIN_H #define __INTRIN_H /* First include the standard intrinsics. */ #if defined(__i386__) || defined(__x86_64__) #include #endif #if defined(__arm__) #include #endif #if defined(__aarch64__) #include #endif /* For the definition of jmp_buf. */ #if __STDC_HOSTED__ #include #endif /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) #if __x86_64__ #define __LPTRINT_TYPE__ __int64 #else #define __LPTRINT_TYPE__ long #endif #ifdef __cplusplus extern "C" { #endif #if defined(__MMX__) /* And the random ones that aren't in those files. */ __m64 _m_from_float(float); float _m_to_float(__m64); #endif /* Other assorted instruction intrinsics. */ void __addfsbyte(unsigned long, unsigned char); void __addfsdword(unsigned long, unsigned long); void __addfsword(unsigned long, unsigned short); void __code_seg(const char *); void __cpuid(int[4], int); void __cpuidex(int[4], int, int); __int64 __emul(int, int); unsigned __int64 __emulu(unsigned int, unsigned int); unsigned int __getcallerseflags(void); void __halt(void); unsigned char __inbyte(unsigned short); void __inbytestring(unsigned short, unsigned char *, unsigned long); void __incfsbyte(unsigned long); void __incfsdword(unsigned long); void __incfsword(unsigned long); unsigned long __indword(unsigned short); void __indwordstring(unsigned short, unsigned long *, unsigned long); void __int2c(void); void __invlpg(void *); unsigned short __inword(unsigned short); void __inwordstring(unsigned short, unsigned short *, unsigned long); void __lidt(void *); unsigned __int64 __ll_lshift(unsigned __int64, int); __int64 __ll_rshift(__int64, int); void __movsb(unsigned char *, unsigned char const *, size_t); void __movsd(unsigned long *, unsigned long const *, size_t); void __movsw(unsigned short *, unsigned short const *, size_t); void __nop(void); void __nvreg_restore_fence(void); void __nvreg_save_fence(void); void __outbyte(unsigned short, unsigned char); void __outbytestring(unsigned short, unsigned char *, unsigned long); void __outdword(unsigned short, unsigned long); void __outdwordstring(unsigned short, unsigned long *, unsigned long); void __outword(unsigned short, unsigned short); void __outwordstring(unsigned short, unsigned short *, unsigned long); unsigned long __readcr0(void); unsigned long __readcr2(void); unsigned __LPTRINT_TYPE__ __readcr3(void); unsigned long __readcr4(void); unsigned long __readcr8(void); unsigned int __readdr(unsigned int); #ifdef __i386__ unsigned char __readfsbyte(unsigned long); unsigned short __readfsword(unsigned long); unsigned long __readfsdword(unsigned long); unsigned __int64 __readfsqword(unsigned long); #endif unsigned __int64 __readmsr(unsigned long); unsigned __int64 __readpmc(unsigned long); unsigned long __segmentlimit(unsigned long); void __sidt(void *); void __stosb(unsigned char *, unsigned char, size_t); void __stosd(unsigned long *, unsigned long, size_t); void __stosw(unsigned short *, unsigned short, size_t); void __svm_clgi(void); void __svm_invlpga(void *, int); void __svm_skinit(int); void __svm_stgi(void); void __svm_vmload(size_t); void __svm_vmrun(size_t); void __svm_vmsave(size_t); void __ud2(void); unsigned __int64 __ull_rshift(unsigned __int64, int); void __vmx_off(void); void __vmx_vmptrst(unsigned __int64 *); void __wbinvd(void); void __writecr0(unsigned int); void __writecr3(unsigned __INTPTR_TYPE__); void __writecr4(unsigned int); void __writecr8(unsigned int); void __writedr(unsigned int, unsigned int); void __writefsbyte(unsigned long, unsigned char); void __writefsdword(unsigned long, unsigned long); void __writefsqword(unsigned long, unsigned __int64); void __writefsword(unsigned long, unsigned short); void __writemsr(unsigned long, unsigned __int64); void *_AddressOfReturnAddress(void); unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask); unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask); unsigned char _bittest(long const *, long); unsigned char _bittestandcomplement(long *, long); unsigned char _bittestandreset(long *, long); unsigned char _bittestandset(long *, long); void __cdecl _disable(void); void __cdecl _enable(void); long _InterlockedAddLargeStatistic(__int64 volatile *_Addend, long _Value); unsigned char _interlockedbittestandreset(long volatile *, long); unsigned char _interlockedbittestandset(long volatile *, long); void *_InterlockedCompareExchangePointer_HLEAcquire(void *volatile *, void *, void *); void *_InterlockedCompareExchangePointer_HLERelease(void *volatile *, void *, void *); long _InterlockedExchangeAdd_HLEAcquire(long volatile *, long); long _InterlockedExchangeAdd_HLERelease(long volatile *, long); __int64 _InterlockedExchangeAdd64_HLEAcquire(__int64 volatile *, __int64); __int64 _InterlockedExchangeAdd64_HLERelease(__int64 volatile *, __int64); void _ReadBarrier(void); void _ReadWriteBarrier(void); unsigned int _rorx_u32(unsigned int, const unsigned int); int _sarx_i32(int, unsigned int); #if __STDC_HOSTED__ int __cdecl _setjmp(jmp_buf); #endif unsigned int _shlx_u32(unsigned int, unsigned int); unsigned int _shrx_u32(unsigned int, unsigned int); void _Store_HLERelease(long volatile *, long); void _Store64_HLERelease(__int64 volatile *, __int64); void _StorePointer_HLERelease(void *volatile *, void *); void _WriteBarrier(void); unsigned __int32 xbegin(void); void _xend(void); /* These additional intrinsics are turned on in x64/amd64/x86_64 mode. */ #ifdef __x86_64__ void __addgsbyte(unsigned long, unsigned char); void __addgsdword(unsigned long, unsigned long); void __addgsqword(unsigned long, unsigned __int64); void __addgsword(unsigned long, unsigned short); void __faststorefence(void); void __incgsbyte(unsigned long); void __incgsdword(unsigned long); void __incgsqword(unsigned long); void __incgsword(unsigned long); void __movsq(unsigned long long *, unsigned long long const *, size_t); unsigned char __readgsbyte(unsigned long); unsigned long __readgsdword(unsigned long); unsigned __int64 __readgsqword(unsigned long); unsigned short __readgsword(unsigned long); unsigned __int64 __shiftleft128(unsigned __int64 _LowPart, unsigned __int64 _HighPart, unsigned char _Shift); unsigned __int64 __shiftright128(unsigned __int64 _LowPart, unsigned __int64 _HighPart, unsigned char _Shift); void __stosq(unsigned __int64 *, unsigned __int64, size_t); unsigned char __vmx_on(unsigned __int64 *); unsigned char __vmx_vmclear(unsigned __int64 *); unsigned char __vmx_vmlaunch(void); unsigned char __vmx_vmptrld(unsigned __int64 *); unsigned char __vmx_vmread(size_t, size_t *); unsigned char __vmx_vmresume(void); unsigned char __vmx_vmwrite(size_t, size_t); void __writegsbyte(unsigned long, unsigned char); void __writegsdword(unsigned long, unsigned long); void __writegsqword(unsigned long, unsigned __int64); void __writegsword(unsigned long, unsigned short); unsigned char _bittest64(__int64 const *, __int64); unsigned char _bittestandcomplement64(__int64 *, __int64); unsigned char _bittestandreset64(__int64 *, __int64); unsigned char _bittestandset64(__int64 *, __int64); long _InterlockedAnd_np(long volatile *_Value, long _Mask); short _InterlockedAnd16_np(short volatile *_Value, short _Mask); __int64 _InterlockedAnd64_np(__int64 volatile *_Value, __int64 _Mask); char _InterlockedAnd8_np(char volatile *_Value, char _Mask); unsigned char _interlockedbittestandreset64(__int64 volatile *, __int64); unsigned char _interlockedbittestandset64(__int64 volatile *, __int64); long _InterlockedCompareExchange_np(long volatile *_Destination, long _Exchange, long _Comparand); unsigned char _InterlockedCompareExchange128_np(__int64 volatile *_Destination, __int64 _ExchangeHigh, __int64 _ExchangeLow, __int64 *_ComparandResult); short _InterlockedCompareExchange16_np(short volatile *_Destination, short _Exchange, short _Comparand); __int64 _InterlockedCompareExchange64_np(__int64 volatile *_Destination, __int64 _Exchange, __int64 _Comparand); void *_InterlockedCompareExchangePointer_np(void *volatile *_Destination, void *_Exchange, void *_Comparand); long _InterlockedOr_np(long volatile *_Value, long _Mask); short _InterlockedOr16_np(short volatile *_Value, short _Mask); __int64 _InterlockedOr64_np(__int64 volatile *_Value, __int64 _Mask); char _InterlockedOr8_np(char volatile *_Value, char _Mask); long _InterlockedXor_np(long volatile *_Value, long _Mask); short _InterlockedXor16_np(short volatile *_Value, short _Mask); __int64 _InterlockedXor64_np(__int64 volatile *_Value, __int64 _Mask); char _InterlockedXor8_np(char volatile *_Value, char _Mask); unsigned __int64 _rorx_u64(unsigned __int64, const unsigned int); __int64 _sarx_i64(__int64, unsigned int); unsigned __int64 _shlx_u64(unsigned __int64, unsigned int); unsigned __int64 _shrx_u64(unsigned __int64, unsigned int); __int64 __mulh(__int64, __int64); unsigned __int64 __umulh(unsigned __int64, unsigned __int64); __int64 _mul128(__int64, __int64, __int64*); unsigned __int64 _umul128(unsigned __int64, unsigned __int64, unsigned __int64*); #endif /* __x86_64__ */ #if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask); unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask); #endif #if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) __int64 _InterlockedDecrement64(__int64 volatile *_Addend); __int64 _InterlockedExchange64(__int64 volatile *_Target, __int64 _Value); __int64 _InterlockedExchangeAdd64(__int64 volatile *_Addend, __int64 _Value); __int64 _InterlockedExchangeSub64(__int64 volatile *_Subend, __int64 _Value); __int64 _InterlockedIncrement64(__int64 volatile *_Addend); __int64 _InterlockedOr64(__int64 volatile *_Value, __int64 _Mask); __int64 _InterlockedXor64(__int64 volatile *_Value, __int64 _Mask); __int64 _InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask); #endif /*----------------------------------------------------------------------------*\ |* Interlocked Exchange Add \*----------------------------------------------------------------------------*/ #if defined(__arm__) || defined(__aarch64__) char _InterlockedExchangeAdd8_acq(char volatile *_Addend, char _Value); char _InterlockedExchangeAdd8_nf(char volatile *_Addend, char _Value); char _InterlockedExchangeAdd8_rel(char volatile *_Addend, char _Value); short _InterlockedExchangeAdd16_acq(short volatile *_Addend, short _Value); short _InterlockedExchangeAdd16_nf(short volatile *_Addend, short _Value); short _InterlockedExchangeAdd16_rel(short volatile *_Addend, short _Value); long _InterlockedExchangeAdd_acq(long volatile *_Addend, long _Value); long _InterlockedExchangeAdd_nf(long volatile *_Addend, long _Value); long _InterlockedExchangeAdd_rel(long volatile *_Addend, long _Value); __int64 _InterlockedExchangeAdd64_acq(__int64 volatile *_Addend, __int64 _Value); __int64 _InterlockedExchangeAdd64_nf(__int64 volatile *_Addend, __int64 _Value); __int64 _InterlockedExchangeAdd64_rel(__int64 volatile *_Addend, __int64 _Value); #endif /*----------------------------------------------------------------------------*\ |* Interlocked Increment \*----------------------------------------------------------------------------*/ #if defined(__arm__) || defined(__aarch64__) short _InterlockedIncrement16_acq(short volatile *_Value); short _InterlockedIncrement16_nf(short volatile *_Value); short _InterlockedIncrement16_rel(short volatile *_Value); long _InterlockedIncrement_acq(long volatile *_Value); long _InterlockedIncrement_nf(long volatile *_Value); long _InterlockedIncrement_rel(long volatile *_Value); __int64 _InterlockedIncrement64_acq(__int64 volatile *_Value); __int64 _InterlockedIncrement64_nf(__int64 volatile *_Value); __int64 _InterlockedIncrement64_rel(__int64 volatile *_Value); #endif /*----------------------------------------------------------------------------*\ |* Interlocked Decrement \*----------------------------------------------------------------------------*/ #if defined(__arm__) || defined(__aarch64__) short _InterlockedDecrement16_acq(short volatile *_Value); short _InterlockedDecrement16_nf(short volatile *_Value); short _InterlockedDecrement16_rel(short volatile *_Value); long _InterlockedDecrement_acq(long volatile *_Value); long _InterlockedDecrement_nf(long volatile *_Value); long _InterlockedDecrement_rel(long volatile *_Value); __int64 _InterlockedDecrement64_acq(__int64 volatile *_Value); __int64 _InterlockedDecrement64_nf(__int64 volatile *_Value); __int64 _InterlockedDecrement64_rel(__int64 volatile *_Value); #endif /*----------------------------------------------------------------------------*\ |* Interlocked And \*----------------------------------------------------------------------------*/ #if defined(__arm__) || defined(__aarch64__) char _InterlockedAnd8_acq(char volatile *_Value, char _Mask); char _InterlockedAnd8_nf(char volatile *_Value, char _Mask); char _InterlockedAnd8_rel(char volatile *_Value, char _Mask); short _InterlockedAnd16_acq(short volatile *_Value, short _Mask); short _InterlockedAnd16_nf(short volatile *_Value, short _Mask); short _InterlockedAnd16_rel(short volatile *_Value, short _Mask); long _InterlockedAnd_acq(long volatile *_Value, long _Mask); long _InterlockedAnd_nf(long volatile *_Value, long _Mask); long _InterlockedAnd_rel(long volatile *_Value, long _Mask); __int64 _InterlockedAnd64_acq(__int64 volatile *_Value, __int64 _Mask); __int64 _InterlockedAnd64_nf(__int64 volatile *_Value, __int64 _Mask); __int64 _InterlockedAnd64_rel(__int64 volatile *_Value, __int64 _Mask); #endif /*----------------------------------------------------------------------------*\ |* Bit Counting and Testing \*----------------------------------------------------------------------------*/ #if defined(__arm__) || defined(__aarch64__) unsigned char _interlockedbittestandset_acq(long volatile *_BitBase, long _BitPos); unsigned char _interlockedbittestandset_nf(long volatile *_BitBase, long _BitPos); unsigned char _interlockedbittestandset_rel(long volatile *_BitBase, long _BitPos); unsigned char _interlockedbittestandreset_acq(long volatile *_BitBase, long _BitPos); unsigned char _interlockedbittestandreset_nf(long volatile *_BitBase, long _BitPos); unsigned char _interlockedbittestandreset_rel(long volatile *_BitBase, long _BitPos); #endif /*----------------------------------------------------------------------------*\ |* Interlocked Or \*----------------------------------------------------------------------------*/ #if defined(__arm__) || defined(__aarch64__) char _InterlockedOr8_acq(char volatile *_Value, char _Mask); char _InterlockedOr8_nf(char volatile *_Value, char _Mask); char _InterlockedOr8_rel(char volatile *_Value, char _Mask); short _InterlockedOr16_acq(short volatile *_Value, short _Mask); short _InterlockedOr16_nf(short volatile *_Value, short _Mask); short _InterlockedOr16_rel(short volatile *_Value, short _Mask); long _InterlockedOr_acq(long volatile *_Value, long _Mask); long _InterlockedOr_nf(long volatile *_Value, long _Mask); long _InterlockedOr_rel(long volatile *_Value, long _Mask); __int64 _InterlockedOr64_acq(__int64 volatile *_Value, __int64 _Mask); __int64 _InterlockedOr64_nf(__int64 volatile *_Value, __int64 _Mask); __int64 _InterlockedOr64_rel(__int64 volatile *_Value, __int64 _Mask); #endif /*----------------------------------------------------------------------------*\ |* Interlocked Xor \*----------------------------------------------------------------------------*/ #if defined(__arm__) || defined(__aarch64__) char _InterlockedXor8_acq(char volatile *_Value, char _Mask); char _InterlockedXor8_nf(char volatile *_Value, char _Mask); char _InterlockedXor8_rel(char volatile *_Value, char _Mask); short _InterlockedXor16_acq(short volatile *_Value, short _Mask); short _InterlockedXor16_nf(short volatile *_Value, short _Mask); short _InterlockedXor16_rel(short volatile *_Value, short _Mask); long _InterlockedXor_acq(long volatile *_Value, long _Mask); long _InterlockedXor_nf(long volatile *_Value, long _Mask); long _InterlockedXor_rel(long volatile *_Value, long _Mask); __int64 _InterlockedXor64_acq(__int64 volatile *_Value, __int64 _Mask); __int64 _InterlockedXor64_nf(__int64 volatile *_Value, __int64 _Mask); __int64 _InterlockedXor64_rel(__int64 volatile *_Value, __int64 _Mask); #endif /*----------------------------------------------------------------------------*\ |* Interlocked Exchange \*----------------------------------------------------------------------------*/ #if defined(__arm__) || defined(__aarch64__) char _InterlockedExchange8_acq(char volatile *_Target, char _Value); char _InterlockedExchange8_nf(char volatile *_Target, char _Value); char _InterlockedExchange8_rel(char volatile *_Target, char _Value); short _InterlockedExchange16_acq(short volatile *_Target, short _Value); short _InterlockedExchange16_nf(short volatile *_Target, short _Value); short _InterlockedExchange16_rel(short volatile *_Target, short _Value); long _InterlockedExchange_acq(long volatile *_Target, long _Value); long _InterlockedExchange_nf(long volatile *_Target, long _Value); long _InterlockedExchange_rel(long volatile *_Target, long _Value); __int64 _InterlockedExchange64_acq(__int64 volatile *_Target, __int64 _Value); __int64 _InterlockedExchange64_nf(__int64 volatile *_Target, __int64 _Value); __int64 _InterlockedExchange64_rel(__int64 volatile *_Target, __int64 _Value); #endif /*----------------------------------------------------------------------------*\ |* Interlocked Compare Exchange \*----------------------------------------------------------------------------*/ #if defined(__arm__) || defined(__aarch64__) char _InterlockedCompareExchange8_acq(char volatile *_Destination, char _Exchange, char _Comparand); char _InterlockedCompareExchange8_nf(char volatile *_Destination, char _Exchange, char _Comparand); char _InterlockedCompareExchange8_rel(char volatile *_Destination, char _Exchange, char _Comparand); short _InterlockedCompareExchange16_acq(short volatile *_Destination, short _Exchange, short _Comparand); short _InterlockedCompareExchange16_nf(short volatile *_Destination, short _Exchange, short _Comparand); short _InterlockedCompareExchange16_rel(short volatile *_Destination, short _Exchange, short _Comparand); long _InterlockedCompareExchange_acq(long volatile *_Destination, long _Exchange, long _Comparand); long _InterlockedCompareExchange_nf(long volatile *_Destination, long _Exchange, long _Comparand); long _InterlockedCompareExchange_rel(long volatile *_Destination, long _Exchange, long _Comparand); __int64 _InterlockedCompareExchange64_acq(__int64 volatile *_Destination, __int64 _Exchange, __int64 _Comparand); __int64 _InterlockedCompareExchange64_nf(__int64 volatile *_Destination, __int64 _Exchange, __int64 _Comparand); __int64 _InterlockedCompareExchange64_rel(__int64 volatile *_Destination, __int64 _Exchange, __int64 _Comparand); #endif #if defined(__x86_64__) || defined(__aarch64__) unsigned char _InterlockedCompareExchange128(__int64 volatile *_Destination, __int64 _ExchangeHigh, __int64 _ExchangeLow, __int64 *_ComparandResult); #endif #if defined(__aarch64__) unsigned char _InterlockedCompareExchange128_acq(__int64 volatile *_Destination, __int64 _ExchangeHigh, __int64 _ExchangeLow, __int64 *_ComparandResult); unsigned char _InterlockedCompareExchange128_nf(__int64 volatile *_Destination, __int64 _ExchangeHigh, __int64 _ExchangeLow, __int64 *_ComparandResult); unsigned char _InterlockedCompareExchange128_rel(__int64 volatile *_Destination, __int64 _ExchangeHigh, __int64 _ExchangeLow, __int64 *_ComparandResult); #endif /*----------------------------------------------------------------------------*\ |* movs, stos \*----------------------------------------------------------------------------*/ #if defined(__i386__) || defined(__x86_64__) static __inline__ void __DEFAULT_FN_ATTRS __movsb(unsigned char *__dst, unsigned char const *__src, size_t __n) { #if defined(__x86_64__) __asm__ __volatile__("rep movsb" : "+D"(__dst), "+S"(__src), "+c"(__n) : : "memory"); #else __asm__ __volatile__("xchg {%%esi, %1|%1, esi}\n" "rep movsb\n" "xchg {%%esi, %1|%1, esi}" : "+D"(__dst), "+r"(__src), "+c"(__n) : : "memory"); #endif } static __inline__ void __DEFAULT_FN_ATTRS __movsd(unsigned long *__dst, unsigned long const *__src, size_t __n) { #if defined(__x86_64__) __asm__ __volatile__("rep movs{l|d}" : "+D"(__dst), "+S"(__src), "+c"(__n) : : "memory"); #else __asm__ __volatile__("xchg {%%esi, %1|%1, esi}\n" "rep movs{l|d}\n" "xchg {%%esi, %1|%1, esi}" : "+D"(__dst), "+r"(__src), "+c"(__n) : : "memory"); #endif } static __inline__ void __DEFAULT_FN_ATTRS __movsw(unsigned short *__dst, unsigned short const *__src, size_t __n) { #if defined(__x86_64__) __asm__ __volatile__("rep movsw" : "+D"(__dst), "+S"(__src), "+c"(__n) : : "memory"); #else __asm__ __volatile__("xchg {%%esi, %1|%1, esi}\n" "rep movsw\n" "xchg {%%esi, %1|%1, esi}" : "+D"(__dst), "+r"(__src), "+c"(__n) : : "memory"); #endif } static __inline__ void __DEFAULT_FN_ATTRS __stosd(unsigned long *__dst, unsigned long __x, size_t __n) { __asm__ __volatile__("rep stos{l|d}" : "+D"(__dst), "+c"(__n) : "a"(__x) : "memory"); } static __inline__ void __DEFAULT_FN_ATTRS __stosw(unsigned short *__dst, unsigned short __x, size_t __n) { __asm__ __volatile__("rep stosw" : "+D"(__dst), "+c"(__n) : "a"(__x) : "memory"); } #endif #ifdef __x86_64__ static __inline__ void __DEFAULT_FN_ATTRS __movsq( unsigned long long *__dst, unsigned long long const *__src, size_t __n) { __asm__ __volatile__("rep movsq" : "+D"(__dst), "+S"(__src), "+c"(__n) : : "memory"); } static __inline__ void __DEFAULT_FN_ATTRS __stosq(unsigned __int64 *__dst, unsigned __int64 __x, size_t __n) { __asm__ __volatile__("rep stosq" : "+D"(__dst), "+c"(__n) : "a"(__x) : "memory"); } #endif /*----------------------------------------------------------------------------*\ |* Misc \*----------------------------------------------------------------------------*/ #if defined(__i386__) || defined(__x86_64__) static __inline__ void __DEFAULT_FN_ATTRS __halt(void) { __asm__ volatile("hlt"); } #endif #if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) static __inline__ void __DEFAULT_FN_ATTRS __nop(void) { __asm__ volatile("nop"); } #endif /*----------------------------------------------------------------------------*\ |* MS AArch64 specific \*----------------------------------------------------------------------------*/ #if defined(__aarch64__) unsigned __int64 __getReg(int); long _InterlockedAdd(long volatile *Addend, long Value); __int64 _ReadStatusReg(int); void _WriteStatusReg(int, __int64); unsigned short __cdecl _byteswap_ushort(unsigned short val); unsigned long __cdecl _byteswap_ulong (unsigned long val); unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64 val); __int64 __mulh(__int64 __a, __int64 __b); unsigned __int64 __umulh(unsigned __int64 __a, unsigned __int64 __b); void __break(int); void __writex18byte(unsigned long offset, unsigned char data); void __writex18word(unsigned long offset, unsigned short data); void __writex18dword(unsigned long offset, unsigned long data); void __writex18qword(unsigned long offset, unsigned __int64 data); unsigned char __readx18byte(unsigned long offset); unsigned short __readx18word(unsigned long offset); unsigned long __readx18dword(unsigned long offset); unsigned __int64 __readx18qword(unsigned long offset); #endif /*----------------------------------------------------------------------------*\ |* Privileged intrinsics \*----------------------------------------------------------------------------*/ #if defined(__i386__) || defined(__x86_64__) static __inline__ unsigned __int64 __DEFAULT_FN_ATTRS __readmsr(unsigned long __register) { // Loads the contents of a 64-bit model specific register (MSR) specified in // the ECX register into registers EDX:EAX. The EDX register is loaded with // the high-order 32 bits of the MSR and the EAX register is loaded with the // low-order 32 bits. If less than 64 bits are implemented in the MSR being // read, the values returned to EDX:EAX in unimplemented bit locations are // undefined. unsigned long __edx; unsigned long __eax; __asm__ ("rdmsr" : "=d"(__edx), "=a"(__eax) : "c"(__register)); return (((unsigned __int64)__edx) << 32) | (unsigned __int64)__eax; } #endif static __inline__ unsigned __LPTRINT_TYPE__ __DEFAULT_FN_ATTRS __readcr3(void) { unsigned __LPTRINT_TYPE__ __cr3_val; __asm__ __volatile__( "mov {%%cr3, %0|%0, cr3}" : "=r"(__cr3_val) : : "memory"); return __cr3_val; } static __inline__ void __DEFAULT_FN_ATTRS __writecr3(unsigned __INTPTR_TYPE__ __cr3_val) { __asm__ ("mov {%0, %%cr3|cr3, %0}" : : "r"(__cr3_val) : "memory"); } #ifdef __cplusplus } #endif #undef __LPTRINT_TYPE__ #undef __DEFAULT_FN_ATTRS #endif /* __INTRIN_H */ #endif /* _MSC_VER */ /*===---- inttypes.h - Standard header for integer printf macros ----------===*\ * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * \*===----------------------------------------------------------------------===*/ #if !defined(__CLANG_INTTYPES_H) || !defined(_INTTYPES_H) // AIX system headers need inttypes.h to be re-enterable while _STD_TYPES_T // is defined until an inclusion of it without _STD_TYPES_T occurs, in which // case the header guard macro is defined. #if !defined(_AIX) || !defined(_STD_TYPES_T) #define __CLANG_INTTYPES_H #endif #if defined(_MSC_VER) && _MSC_VER < 1800 #error MSVC does not have inttypes.h prior to Visual Studio 2013 #endif #include_next #if defined(_MSC_VER) && _MSC_VER < 1900 /* MSVC headers define int32_t as int, but PRIx32 as "lx" instead of "x". * This triggers format warnings, so fix it up here. */ #undef PRId32 #undef PRIdLEAST32 #undef PRIdFAST32 #undef PRIi32 #undef PRIiLEAST32 #undef PRIiFAST32 #undef PRIo32 #undef PRIoLEAST32 #undef PRIoFAST32 #undef PRIu32 #undef PRIuLEAST32 #undef PRIuFAST32 #undef PRIx32 #undef PRIxLEAST32 #undef PRIxFAST32 #undef PRIX32 #undef PRIXLEAST32 #undef PRIXFAST32 #undef SCNd32 #undef SCNdLEAST32 #undef SCNdFAST32 #undef SCNi32 #undef SCNiLEAST32 #undef SCNiFAST32 #undef SCNo32 #undef SCNoLEAST32 #undef SCNoFAST32 #undef SCNu32 #undef SCNuLEAST32 #undef SCNuFAST32 #undef SCNx32 #undef SCNxLEAST32 #undef SCNxFAST32 #define PRId32 "d" #define PRIdLEAST32 "d" #define PRIdFAST32 "d" #define PRIi32 "i" #define PRIiLEAST32 "i" #define PRIiFAST32 "i" #define PRIo32 "o" #define PRIoLEAST32 "o" #define PRIoFAST32 "o" #define PRIu32 "u" #define PRIuLEAST32 "u" #define PRIuFAST32 "u" #define PRIx32 "x" #define PRIxLEAST32 "x" #define PRIxFAST32 "x" #define PRIX32 "X" #define PRIXLEAST32 "X" #define PRIXFAST32 "X" #define SCNd32 "d" #define SCNdLEAST32 "d" #define SCNdFAST32 "d" #define SCNi32 "i" #define SCNiLEAST32 "i" #define SCNiFAST32 "i" #define SCNo32 "o" #define SCNoLEAST32 "o" #define SCNoFAST32 "o" #define SCNu32 "u" #define SCNuLEAST32 "u" #define SCNuFAST32 "u" #define SCNx32 "x" #define SCNxLEAST32 "x" #define SCNxFAST32 "x" #endif #endif /* __CLANG_INTTYPES_H */ /*===------------- invpcidintrin.h - INVPCID intrinsic ---------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __INVPCIDINTRIN_H #define __INVPCIDINTRIN_H static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("invpcid"))) _invpcid(unsigned int __type, void *__descriptor) { __builtin_ia32_invpcid(__type, __descriptor); } #endif /* __INVPCIDINTRIN_H */ /*===---- iso646.h - Standard header for alternate spellings of operators---=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __ISO646_H #define __ISO646_H #ifndef __cplusplus #define and && #define and_eq &= #define bitand & #define bitor | #define compl ~ #define not ! #define not_eq != #define or || #define or_eq |= #define xor ^ #define xor_eq ^= #endif #endif /* __ISO646_H */ /*===----------------- keylockerintrin.h - KL Intrinsics -------------------=== * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef _KEYLOCKERINTRIN_H #define _KEYLOCKERINTRIN_H #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__KL__) /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("kl"),\ __min_vector_width__(128))) /// Load internal wrapping key from __intkey, __enkey_lo and __enkey_hi. __ctl /// will assigned to EAX, whch specifies the KeySource and whether backing up /// the key is permitted. The 256-bit encryption key is loaded from the two /// explicit operands (__enkey_lo and __enkey_hi). The 128-bit integrity key is /// loaded from the implicit operand XMM0 which assigned by __intkey. /// /// \headerfile /// /// This intrinsic corresponds to the LOADIWKEY instructions. /// /// \code{.operation} /// IF CPL > 0 // LOADKWKEY only allowed at ring 0 (supervisor mode) /// GP (0) /// FI /// IF “LOADIWKEY exiting” VM execution control set /// VMexit /// FI /// IF __ctl[4:1] > 1 // Reserved KeySource encoding used /// GP (0) /// FI /// IF __ctl[31:5] != 0 // Reserved bit in __ctl is set /// GP (0) /// FI /// IF __ctl[0] AND (CPUID.19H.ECX[0] == 0) // NoBackup is not supported on this part /// GP (0) /// FI /// IF (__ctl[4:1] == 1) AND (CPUID.19H.ECX[1] == 0) // KeySource of 1 is not supported on this part /// GP (0) /// FI /// IF (__ctl[4:1] == 0) // KeySource of 0. /// IWKey.Encryption Key[127:0] := __enkey_hi[127:0]: /// IWKey.Encryption Key[255:128] := __enkey_lo[127:0] /// IWKey.IntegrityKey[127:0] := __intkey[127:0] /// IWKey.NoBackup := __ctl[0] /// IWKey.KeySource := __ctl[4:1] /// ZF := 0 /// ELSE // KeySource of 1. See RDSEED definition for details of randomness /// IF HW_NRND_GEN.ready == 1 // Full-entropy random data from RDSEED was received /// IWKey.Encryption Key[127:0] := __enkey_hi[127:0] XOR HW_NRND_GEN.data[127:0] /// IWKey.Encryption Key[255:128] := __enkey_lo[127:0] XOR HW_NRND_GEN.data[255:128] /// IWKey.Encryption Key[255:0] := __enkey_hi[127:0]:__enkey_lo[127:0] XOR HW_NRND_GEN.data[255:0] /// IWKey.IntegrityKey[127:0] := __intkey[127:0] XOR HW_NRND_GEN.data[383:256] /// IWKey.NoBackup := __ctl[0] /// IWKey.KeySource := __ctl[4:1] /// ZF := 0 /// ELSE // Random data was not returned from RDSEED. IWKey was not loaded /// ZF := 1 /// FI /// FI /// dst := ZF /// OF := 0 /// SF := 0 /// AF := 0 /// PF := 0 /// CF := 0 /// \endcode static __inline__ void __DEFAULT_FN_ATTRS _mm_loadiwkey (unsigned int __ctl, __m128i __intkey, __m128i __enkey_lo, __m128i __enkey_hi) { __builtin_ia32_loadiwkey (__intkey, __enkey_lo, __enkey_hi, __ctl); } /// Wrap a 128-bit AES key from __key into a key handle and output in /// ((__m128i*)__h) to ((__m128i*)__h) + 2 and a 32-bit value as return. /// The explicit source operand __htype specifies handle restrictions. /// /// \headerfile /// /// This intrinsic corresponds to the ENCODEKEY128 instructions. /// /// \code{.operation} /// InputKey[127:0] := __key[127:0] /// KeyMetadata[2:0] := __htype[2:0] /// KeyMetadata[23:3] := 0 // Reserved for future usage /// KeyMetadata[27:24] := 0 // KeyType is AES-128 (value of 0) /// KeyMetadata[127:28] := 0 // Reserved for future usage /// Handle[383:0] := WrapKey128(InputKey[127:0], KeyMetadata[127:0], /// IWKey.Integrity Key[127:0], IWKey.Encryption Key[255:0]) /// dst[0] := IWKey.NoBackup /// dst[4:1] := IWKey.KeySource[3:0] /// dst[31:5] := 0 /// MEM[__h+127:__h] := Handle[127:0] // AAD /// MEM[__h+255:__h+128] := Handle[255:128] // Integrity Tag /// MEM[__h+383:__h+256] := Handle[383:256] // CipherText /// OF := 0 /// SF := 0 /// ZF := 0 /// AF := 0 /// PF := 0 /// CF := 0 /// \endcode static __inline__ unsigned int __DEFAULT_FN_ATTRS _mm_encodekey128_u32(unsigned int __htype, __m128i __key, void *__h) { return __builtin_ia32_encodekey128_u32(__htype, (__v2di)__key, __h); } /// Wrap a 256-bit AES key from __key_hi:__key_lo into a key handle, then /// output handle in ((__m128i*)__h) to ((__m128i*)__h) + 3 and /// a 32-bit value as return. /// The explicit source operand __htype specifies handle restrictions. /// /// \headerfile /// /// This intrinsic corresponds to the ENCODEKEY256 instructions. /// /// \code{.operation} /// InputKey[127:0] := __key_lo[127:0] /// InputKey[255:128] := __key_hi[255:128] /// KeyMetadata[2:0] := __htype[2:0] /// KeyMetadata[23:3] := 0 // Reserved for future usage /// KeyMetadata[27:24] := 1 // KeyType is AES-256 (value of 1) /// KeyMetadata[127:28] := 0 // Reserved for future usage /// Handle[511:0] := WrapKey256(InputKey[255:0], KeyMetadata[127:0], /// IWKey.Integrity Key[127:0], IWKey.Encryption Key[255:0]) /// dst[0] := IWKey.NoBackup /// dst[4:1] := IWKey.KeySource[3:0] /// dst[31:5] := 0 /// MEM[__h+127:__h] := Handle[127:0] // AAD /// MEM[__h+255:__h+128] := Handle[255:128] // Tag /// MEM[__h+383:__h+256] := Handle[383:256] // CipherText[127:0] /// MEM[__h+511:__h+384] := Handle[511:384] // CipherText[255:128] /// OF := 0 /// SF := 0 /// ZF := 0 /// AF := 0 /// PF := 0 /// CF := 0 /// \endcode static __inline__ unsigned int __DEFAULT_FN_ATTRS _mm_encodekey256_u32(unsigned int __htype, __m128i __key_lo, __m128i __key_hi, void *__h) { return __builtin_ia32_encodekey256_u32(__htype, (__v2di)__key_lo, (__v2di)__key_hi, __h); } /// The AESENC128KL performs 10 rounds of AES to encrypt the __idata using /// the 128-bit key in the handle from the __h. It stores the result in the /// __odata. And return the affected ZF flag status. /// /// \headerfile /// /// This intrinsic corresponds to the AESENC128KL instructions. /// /// \code{.operation} /// Handle[383:0] := MEM[__h+383:__h] // Load is not guaranteed to be atomic. /// IllegalHandle := ( HandleReservedBitSet (Handle[383:0]) || /// (Handle[127:0] AND (CPL > 0)) || /// Handle[383:256] || /// HandleKeyType (Handle[383:0]) != HANDLE_KEY_TYPE_AES128 ) /// IF (IllegalHandle) /// ZF := 1 /// ELSE /// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate384 (Handle[383:0], IWKey) /// IF (Authentic == 0) /// ZF := 1 /// ELSE /// MEM[__odata+127:__odata] := AES128Encrypt (__idata[127:0], UnwrappedKey) /// ZF := 0 /// FI /// FI /// dst := ZF /// OF := 0 /// SF := 0 /// AF := 0 /// PF := 0 /// CF := 0 /// \endcode static __inline__ unsigned char __DEFAULT_FN_ATTRS _mm_aesenc128kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { return __builtin_ia32_aesenc128kl_u8((__v2di *)__odata, (__v2di)__idata, __h); } /// The AESENC256KL performs 14 rounds of AES to encrypt the __idata using /// the 256-bit key in the handle from the __h. It stores the result in the /// __odata. And return the affected ZF flag status. /// /// \headerfile /// /// This intrinsic corresponds to the AESENC256KL instructions. /// /// \code{.operation} /// Handle[511:0] := MEM[__h+511:__h] // Load is not guaranteed to be atomic. /// IllegalHandle := ( HandleReservedBitSet (Handle[511:0]) || /// (Handle[127:0] AND (CPL > 0)) || /// Handle[255:128] || /// HandleKeyType (Handle[511:0]) != HANDLE_KEY_TYPE_AES256 ) /// IF (IllegalHandle) /// ZF := 1 /// MEM[__odata+127:__odata] := 0 /// ELSE /// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate512 (Handle[511:0], IWKey) /// IF (Authentic == 0) /// ZF := 1 /// MEM[__odata+127:__odata] := 0 /// ELSE /// MEM[__odata+127:__odata] := AES256Encrypt (__idata[127:0], UnwrappedKey) /// ZF := 0 /// FI /// FI /// dst := ZF /// OF := 0 /// SF := 0 /// AF := 0 /// PF := 0 /// CF := 0 /// \endcode static __inline__ unsigned char __DEFAULT_FN_ATTRS _mm_aesenc256kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { return __builtin_ia32_aesenc256kl_u8((__v2di *)__odata, (__v2di)__idata, __h); } /// The AESDEC128KL performs 10 rounds of AES to decrypt the __idata using /// the 128-bit key in the handle from the __h. It stores the result in the /// __odata. And return the affected ZF flag status. /// /// \headerfile /// /// This intrinsic corresponds to the AESDEC128KL instructions. /// /// \code{.operation} /// Handle[383:0] := MEM[__h+383:__h] // Load is not guaranteed to be atomic. /// IllegalHandle := (HandleReservedBitSet (Handle[383:0]) || /// (Handle[127:0] AND (CPL > 0)) || /// Handle[383:256] || /// HandleKeyType (Handle[383:0]) != HANDLE_KEY_TYPE_AES128) /// IF (IllegalHandle) /// ZF := 1 /// MEM[__odata+127:__odata] := 0 /// ELSE /// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate384 (Handle[383:0], IWKey) /// IF (Authentic == 0) /// ZF := 1 /// MEM[__odata+127:__odata] := 0 /// ELSE /// MEM[__odata+127:__odata] := AES128Decrypt (__idata[127:0], UnwrappedKey) /// ZF := 0 /// FI /// FI /// dst := ZF /// OF := 0 /// SF := 0 /// AF := 0 /// PF := 0 /// CF := 0 /// \endcode static __inline__ unsigned char __DEFAULT_FN_ATTRS _mm_aesdec128kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { return __builtin_ia32_aesdec128kl_u8((__v2di *)__odata, (__v2di)__idata, __h); } /// The AESDEC256KL performs 10 rounds of AES to decrypt the __idata using /// the 256-bit key in the handle from the __h. It stores the result in the /// __odata. And return the affected ZF flag status. /// /// \headerfile /// /// This intrinsic corresponds to the AESDEC256KL instructions. /// /// \code{.operation} /// Handle[511:0] := MEM[__h+511:__h] /// IllegalHandle := (HandleReservedBitSet (Handle[511:0]) || /// (Handle[127:0] AND (CPL > 0)) || /// Handle[383:256] || /// HandleKeyType (Handle[511:0]) != HANDLE_KEY_TYPE_AES256) /// IF (IllegalHandle) /// ZF := 1 /// MEM[__odata+127:__odata] := 0 /// ELSE /// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate512 (Handle[511:0], IWKey) /// IF (Authentic == 0) /// ZF := 1 /// MEM[__odata+127:__odata] := 0 /// ELSE /// MEM[__odata+127:__odata] := AES256Decrypt (__idata[127:0], UnwrappedKey) /// ZF := 0 /// FI /// FI /// dst := ZF /// OF := 0 /// SF := 0 /// AF := 0 /// PF := 0 /// CF := 0 /// \endcode static __inline__ unsigned char __DEFAULT_FN_ATTRS _mm_aesdec256kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { return __builtin_ia32_aesdec256kl_u8((__v2di *)__odata, (__v2di)__idata, __h); } #undef __DEFAULT_FN_ATTRS #endif /* !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) \ || defined(__KL__) */ #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__WIDEKL__) /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("kl,widekl"),\ __min_vector_width__(128))) /// Encrypt __idata[0] to __idata[7] using 128-bit AES key indicated by handle /// at __h and store each resultant block back from __odata to __odata+7. And /// return the affected ZF flag status. /// /// \headerfile /// /// This intrinsic corresponds to the AESENCWIDE128KL instructions. /// /// \code{.operation} /// Handle := MEM[__h+383:__h] /// IllegalHandle := ( HandleReservedBitSet (Handle[383:0]) || /// (Handle[127:0] AND (CPL > 0)) || /// Handle[255:128] || /// HandleKeyType (Handle[383:0]) != HANDLE_KEY_TYPE_AES128 ) /// IF (IllegalHandle) /// ZF := 1 /// FOR i := 0 to 7 /// __odata[i] := 0 /// ENDFOR /// ELSE /// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate384 (Handle[383:0], IWKey) /// IF Authentic == 0 /// ZF := 1 /// FOR i := 0 to 7 /// __odata[i] := 0 /// ENDFOR /// ELSE /// FOR i := 0 to 7 /// __odata[i] := AES128Encrypt (__idata[i], UnwrappedKey) /// ENDFOR /// ZF := 0 /// FI /// FI /// dst := ZF /// OF := 0 /// SF := 0 /// AF := 0 /// PF := 0 /// CF := 0 /// \endcode static __inline__ unsigned char __DEFAULT_FN_ATTRS _mm_aesencwide128kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) { return __builtin_ia32_aesencwide128kl_u8((__v2di *)__odata, (const __v2di *)__idata, __h); } /// Encrypt __idata[0] to __idata[7] using 256-bit AES key indicated by handle /// at __h and store each resultant block back from __odata to __odata+7. And /// return the affected ZF flag status. /// /// \headerfile /// /// This intrinsic corresponds to the AESENCWIDE256KL instructions. /// /// \code{.operation} /// Handle[511:0] := MEM[__h+511:__h] /// IllegalHandle := ( HandleReservedBitSet (Handle[511:0]) || /// (Handle[127:0] AND (CPL > 0)) || /// Handle[255:128] || /// HandleKeyType (Handle[511:0]) != HANDLE_KEY_TYPE_AES512 ) /// IF (IllegalHandle) /// ZF := 1 /// FOR i := 0 to 7 /// __odata[i] := 0 /// ENDFOR /// ELSE /// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate512 (Handle[511:0], IWKey) /// IF Authentic == 0 /// ZF := 1 /// FOR i := 0 to 7 /// __odata[i] := 0 /// ENDFOR /// ELSE /// FOR i := 0 to 7 /// __odata[i] := AES256Encrypt (__idata[i], UnwrappedKey) /// ENDFOR /// ZF := 0 /// FI /// FI /// dst := ZF /// OF := 0 /// SF := 0 /// AF := 0 /// PF := 0 /// CF := 0 /// \endcode static __inline__ unsigned char __DEFAULT_FN_ATTRS _mm_aesencwide256kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) { return __builtin_ia32_aesencwide256kl_u8((__v2di *)__odata, (const __v2di *)__idata, __h); } /// Decrypt __idata[0] to __idata[7] using 128-bit AES key indicated by handle /// at __h and store each resultant block back from __odata to __odata+7. And /// return the affected ZF flag status. /// /// \headerfile /// /// This intrinsic corresponds to the AESDECWIDE128KL instructions. /// /// \code{.operation} /// Handle[383:0] := MEM[__h+383:__h] /// IllegalHandle := ( HandleReservedBitSet (Handle[383:0]) || /// (Handle[127:0] AND (CPL > 0)) || /// Handle[255:128] || /// HandleKeyType (Handle) != HANDLE_KEY_TYPE_AES128 ) /// IF (IllegalHandle) /// ZF := 1 /// FOR i := 0 to 7 /// __odata[i] := 0 /// ENDFOR /// ELSE /// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate384 (Handle[383:0], IWKey) /// IF Authentic == 0 /// ZF := 1 /// FOR i := 0 to 7 /// __odata[i] := 0 /// ENDFOR /// ELSE /// FOR i := 0 to 7 /// __odata[i] := AES128Decrypt (__idata[i], UnwrappedKey) /// ENDFOR /// ZF := 0 /// FI /// FI /// dst := ZF /// OF := 0 /// SF := 0 /// AF := 0 /// PF := 0 /// CF := 0 /// \endcode static __inline__ unsigned char __DEFAULT_FN_ATTRS _mm_aesdecwide128kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) { return __builtin_ia32_aesdecwide128kl_u8((__v2di *)__odata, (const __v2di *)__idata, __h); } /// Decrypt __idata[0] to __idata[7] using 256-bit AES key indicated by handle /// at __h and store each resultant block back from __odata to __odata+7. And /// return the affected ZF flag status. /// /// \headerfile /// /// This intrinsic corresponds to the AESDECWIDE256KL instructions. /// /// \code{.operation} /// Handle[511:0] := MEM[__h+511:__h] /// IllegalHandle = ( HandleReservedBitSet (Handle[511:0]) || /// (Handle[127:0] AND (CPL > 0)) || /// Handle[255:128] || /// HandleKeyType (Handle) != HANDLE_KEY_TYPE_AES512 ) /// If (IllegalHandle) /// ZF := 1 /// FOR i := 0 to 7 /// __odata[i] := 0 /// ENDFOR /// ELSE /// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate512 (Handle[511:0], IWKey) /// IF Authentic == 0 /// ZF := 1 /// FOR i := 0 to 7 /// __odata[i] := 0 /// ENDFOR /// ELSE /// FOR i := 0 to 7 /// __odata[i] := AES256Decrypt (__idata[i], UnwrappedKey) /// ENDFOR /// ZF := 0 /// FI /// FI /// dst := ZF /// OF := 0 /// SF := 0 /// AF := 0 /// PF := 0 /// CF := 0 /// \endcode static __inline__ unsigned char __DEFAULT_FN_ATTRS _mm_aesdecwide256kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) { return __builtin_ia32_aesdecwide256kl_u8((__v2di *)__odata, (const __v2di *)__idata, __h); } #undef __DEFAULT_FN_ATTRS #endif /* !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) \ || defined(__WIDEKL__) */ #endif /* _KEYLOCKERINTRIN_H */ /*===------------ larchintrin.h - LoongArch intrinsics ---------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef _LOONGARCH_BASE_INTRIN_H #define _LOONGARCH_BASE_INTRIN_H #ifdef __cplusplus extern "C" { #endif typedef struct rdtime { unsigned int value; unsigned int timeid; } __rdtime_t; #if __loongarch_grlen == 64 typedef struct drdtime { unsigned long dvalue; unsigned long dtimeid; } __drdtime_t; extern __inline __drdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __rdtime_d(void) { __drdtime_t __drdtime; __asm__ volatile( "rdtime.d %[val], %[tid]\n\t" : [val] "=&r"(__drdtime.dvalue), [tid] "=&r"(__drdtime.dtimeid)); return __drdtime; } #endif extern __inline __rdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __rdtimeh_w(void) { __rdtime_t __rdtime; __asm__ volatile("rdtimeh.w %[val], %[tid]\n\t" : [val] "=&r"(__rdtime.value), [tid] "=&r"(__rdtime.timeid)); return __rdtime; } extern __inline __rdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __rdtimel_w(void) { __rdtime_t __rdtime; __asm__ volatile("rdtimel.w %[val], %[tid]\n\t" : [val] "=&r"(__rdtime.value), [tid] "=&r"(__rdtime.timeid)); return __rdtime; } #if __loongarch_grlen == 64 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __crc_w_b_w(char _1, int _2) { return (int)__builtin_loongarch_crc_w_b_w((char)_1, (int)_2); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __crc_w_h_w(short _1, int _2) { return (int)__builtin_loongarch_crc_w_h_w((short)_1, (int)_2); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __crc_w_w_w(int _1, int _2) { return (int)__builtin_loongarch_crc_w_w_w((int)_1, (int)_2); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __crc_w_d_w(long int _1, int _2) { return (int)__builtin_loongarch_crc_w_d_w((long int)_1, (int)_2); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __crcc_w_b_w(char _1, int _2) { return (int)__builtin_loongarch_crcc_w_b_w((char)_1, (int)_2); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __crcc_w_h_w(short _1, int _2) { return (int)__builtin_loongarch_crcc_w_h_w((short)_1, (int)_2); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __crcc_w_w_w(int _1, int _2) { return (int)__builtin_loongarch_crcc_w_w_w((int)_1, (int)_2); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __crcc_w_d_w(long int _1, int _2) { return (int)__builtin_loongarch_crcc_w_d_w((long int)_1, (int)_2); } #endif #define __break(/*ui15*/ _1) __builtin_loongarch_break((_1)) #if __loongarch_grlen == 32 #define __cacop_w(/*uimm5*/ _1, /*unsigned int*/ _2, /*simm12*/ _3) \ ((void)__builtin_loongarch_cacop_w((_1), (unsigned int)(_2), (_3))) #endif #if __loongarch_grlen == 64 #define __cacop_d(/*uimm5*/ _1, /*unsigned long int*/ _2, /*simm12*/ _3) \ ((void)__builtin_loongarch_cacop_d((_1), (unsigned long int)(_2), (_3))) #endif #define __dbar(/*ui15*/ _1) __builtin_loongarch_dbar((_1)) #define __ibar(/*ui15*/ _1) __builtin_loongarch_ibar((_1)) #define __movfcsr2gr(/*ui5*/ _1) __builtin_loongarch_movfcsr2gr((_1)); #define __movgr2fcsr(/*ui5*/ _1, _2) \ __builtin_loongarch_movgr2fcsr((_1), (unsigned int)_2); #define __syscall(/*ui15*/ _1) __builtin_loongarch_syscall((_1)) #define __csrrd_w(/*ui14*/ _1) ((unsigned int)__builtin_loongarch_csrrd_w((_1))) #define __csrwr_w(/*unsigned int*/ _1, /*ui14*/ _2) \ ((unsigned int)__builtin_loongarch_csrwr_w((unsigned int)(_1), (_2))) #define __csrxchg_w(/*unsigned int*/ _1, /*unsigned int*/ _2, /*ui14*/ _3) \ ((unsigned int)__builtin_loongarch_csrxchg_w((unsigned int)(_1), \ (unsigned int)(_2), (_3))) #if __loongarch_grlen == 64 #define __csrrd_d(/*ui14*/ _1) \ ((unsigned long int)__builtin_loongarch_csrrd_d((_1))) #define __csrwr_d(/*unsigned long int*/ _1, /*ui14*/ _2) \ ((unsigned long int)__builtin_loongarch_csrwr_d((unsigned long int)(_1), \ (_2))) #define __csrxchg_d(/*unsigned long int*/ _1, /*unsigned long int*/ _2, \ /*ui14*/ _3) \ ((unsigned long int)__builtin_loongarch_csrxchg_d( \ (unsigned long int)(_1), (unsigned long int)(_2), (_3))) #endif extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __iocsrrd_b(unsigned int _1) { return (unsigned char)__builtin_loongarch_iocsrrd_b((unsigned int)_1); } extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __iocsrrd_h(unsigned int _1) { return (unsigned short)__builtin_loongarch_iocsrrd_h((unsigned int)_1); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __iocsrrd_w(unsigned int _1) { return (unsigned int)__builtin_loongarch_iocsrrd_w((unsigned int)_1); } #if __loongarch_grlen == 64 extern __inline unsigned long int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __iocsrrd_d(unsigned int _1) { return (unsigned long int)__builtin_loongarch_iocsrrd_d((unsigned int)_1); } #endif extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __iocsrwr_b(unsigned char _1, unsigned int _2) { __builtin_loongarch_iocsrwr_b((unsigned char)_1, (unsigned int)_2); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __iocsrwr_h(unsigned short _1, unsigned int _2) { __builtin_loongarch_iocsrwr_h((unsigned short)_1, (unsigned int)_2); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __iocsrwr_w(unsigned int _1, unsigned int _2) { __builtin_loongarch_iocsrwr_w((unsigned int)_1, (unsigned int)_2); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __cpucfg(unsigned int _1) { return (unsigned int)__builtin_loongarch_cpucfg((unsigned int)_1); } #if __loongarch_grlen == 64 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __iocsrwr_d(unsigned long int _1, unsigned int _2) { __builtin_loongarch_iocsrwr_d((unsigned long int)_1, (unsigned int)_2); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __asrtgt_d(long int _1, long int _2) { __builtin_loongarch_asrtgt_d((long int)_1, (long int)_2); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __asrtle_d(long int _1, long int _2) { __builtin_loongarch_asrtle_d((long int)_1, (long int)_2); } #endif #if __loongarch_grlen == 64 #define __lddir_d(/*long int*/ _1, /*ui5*/ _2) \ ((long int)__builtin_loongarch_lddir_d((long int)(_1), (_2))) #define __ldpte_d(/*long int*/ _1, /*ui5*/ _2) \ ((void)__builtin_loongarch_ldpte_d((long int)(_1), (_2))) #endif #ifdef __cplusplus } #endif #endif /* _LOONGARCH_BASE_INTRIN_H */ /*===---- limits.h - Standard header for integer sizes --------------------===*\ * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * \*===----------------------------------------------------------------------===*/ #if !defined(__CLANG_LIMITS_H) || !defined(_LIBC_LIMITS_H_) #define __CLANG_LIMITS_H /* The system's limits.h may, in turn, try to #include_next GCC's limits.h. Avert this #include_next madness. */ #if defined __GNUC__ && !defined _GCC_LIMITS_H_ #define _GCC_LIMITS_H_ #endif /* System headers include a number of constants from POSIX in . Include it if we're hosted. */ #if __STDC_HOSTED__ && __has_include_next() #include_next #endif /* Many system headers try to "help us out" by defining these. No really, we know how big each datatype is. */ #undef SCHAR_MIN #undef SCHAR_MAX #undef UCHAR_MAX #undef SHRT_MIN #undef SHRT_MAX #undef USHRT_MAX #undef INT_MIN #undef INT_MAX #undef UINT_MAX #undef LONG_MIN #undef LONG_MAX #undef ULONG_MAX #undef CHAR_BIT #undef CHAR_MIN #undef CHAR_MAX /* C90/99 5.2.4.2.1 */ #define SCHAR_MAX __SCHAR_MAX__ #define SHRT_MAX __SHRT_MAX__ #define INT_MAX __INT_MAX__ #define LONG_MAX __LONG_MAX__ #define SCHAR_MIN (-__SCHAR_MAX__-1) #define SHRT_MIN (-__SHRT_MAX__ -1) #define INT_MIN (-__INT_MAX__ -1) #define LONG_MIN (-__LONG_MAX__ -1L) #define UCHAR_MAX (__SCHAR_MAX__*2 +1) #if __SHRT_WIDTH__ < __INT_WIDTH__ #define USHRT_MAX (__SHRT_MAX__ * 2 + 1) #else #define USHRT_MAX (__SHRT_MAX__ * 2U + 1U) #endif #define UINT_MAX (__INT_MAX__ *2U +1U) #define ULONG_MAX (__LONG_MAX__ *2UL+1UL) #ifndef MB_LEN_MAX #define MB_LEN_MAX 1 #endif #define CHAR_BIT __CHAR_BIT__ /* C2x 5.2.4.2.1 */ /* FIXME: This is using the placeholder dates Clang produces for these macros in C2x mode; switch to the correct values once they've been published. */ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L #define BOOL_WIDTH __BOOL_WIDTH__ #define CHAR_WIDTH CHAR_BIT #define SCHAR_WIDTH CHAR_BIT #define UCHAR_WIDTH CHAR_BIT #define USHRT_WIDTH __SHRT_WIDTH__ #define SHRT_WIDTH __SHRT_WIDTH__ #define UINT_WIDTH __INT_WIDTH__ #define INT_WIDTH __INT_WIDTH__ #define ULONG_WIDTH __LONG_WIDTH__ #define LONG_WIDTH __LONG_WIDTH__ #define ULLONG_WIDTH __LLONG_WIDTH__ #define LLONG_WIDTH __LLONG_WIDTH__ #define BITINT_MAXWIDTH __BITINT_MAXWIDTH__ #endif #ifdef __CHAR_UNSIGNED__ /* -funsigned-char */ #define CHAR_MIN 0 #define CHAR_MAX UCHAR_MAX #else #define CHAR_MIN SCHAR_MIN #define CHAR_MAX __SCHAR_MAX__ #endif /* C99 5.2.4.2.1: Added long long. C++11 18.3.3.2: same contents as the Standard C Library header . */ #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ (defined(__cplusplus) && __cplusplus >= 201103L) #undef LLONG_MIN #undef LLONG_MAX #undef ULLONG_MAX #define LLONG_MAX __LONG_LONG_MAX__ #define LLONG_MIN (-__LONG_LONG_MAX__-1LL) #define ULLONG_MAX (__LONG_LONG_MAX__*2ULL+1ULL) #endif /* LONG_LONG_MIN/LONG_LONG_MAX/ULONG_LONG_MAX are a GNU extension. It's too bad that we don't have something like #pragma poison that could be used to deprecate a macro - the code should just use LLONG_MAX and friends. */ #if defined(__GNU_LIBRARY__) ? defined(__USE_GNU) : !defined(__STRICT_ANSI__) #undef LONG_LONG_MIN #undef LONG_LONG_MAX #undef ULONG_LONG_MAX #define LONG_LONG_MAX __LONG_LONG_MAX__ #define LONG_LONG_MIN (-__LONG_LONG_MAX__-1LL) #define ULONG_LONG_MAX (__LONG_LONG_MAX__*2ULL+1ULL) #endif #endif /* __CLANG_LIMITS_H */ /*===---- lwpintrin.h - LWP intrinsics -------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __X86INTRIN_H #error "Never use directly; include instead." #endif #ifndef __LWPINTRIN_H #define __LWPINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("lwp"))) /// Parses the LWPCB at the specified address and enables /// profiling if valid. /// /// \headerfile /// /// This intrinsic corresponds to the LLWPCB instruction. /// /// \param __addr /// Address to the new Lightweight Profiling Control Block (LWPCB). If the /// LWPCB is valid, writes the address into the LWP_CBADDR MSR and enables /// Lightweight Profiling. static __inline__ void __DEFAULT_FN_ATTRS __llwpcb (void *__addr) { __builtin_ia32_llwpcb(__addr); } /// Flushes the LWP state to memory and returns the address of the LWPCB. /// /// \headerfile /// /// This intrinsic corresponds to the SLWPCB instruction. /// /// \return /// Address to the current Lightweight Profiling Control Block (LWPCB). /// If LWP is not currently enabled, returns NULL. static __inline__ void* __DEFAULT_FN_ATTRS __slwpcb (void) { return __builtin_ia32_slwpcb(); } /// Inserts programmed event record into the LWP event ring buffer /// and advances the ring buffer pointer. /// /// \headerfile /// /// This intrinsic corresponds to the LWPINS instruction. /// /// \param DATA2 /// A 32-bit value is zero-extended and inserted into the 64-bit Data2 field. /// \param DATA1 /// A 32-bit value is inserted into the 32-bit Data1 field. /// \param FLAGS /// A 32-bit immediate value is inserted into the 32-bit Flags field. /// \returns If the ring buffer is full and LWP is running in Synchronized Mode, /// the event record overwrites the last record in the buffer, the MissedEvents /// counter in the LWPCB is incremented, the head pointer is not advanced, and /// 1 is returned. Otherwise 0 is returned. #define __lwpins32(DATA2, DATA1, FLAGS) \ (__builtin_ia32_lwpins32((unsigned int) (DATA2), (unsigned int) (DATA1), \ (unsigned int) (FLAGS))) /// Decrements the LWP programmed value sample event counter. If the result is /// negative, inserts an event record into the LWP event ring buffer in memory /// and advances the ring buffer pointer. /// /// \headerfile /// /// This intrinsic corresponds to the LWPVAL instruction. /// /// \param DATA2 /// A 32-bit value is zero-extended and inserted into the 64-bit Data2 field. /// \param DATA1 /// A 32-bit value is inserted into the 32-bit Data1 field. /// \param FLAGS /// A 32-bit immediate value is inserted into the 32-bit Flags field. #define __lwpval32(DATA2, DATA1, FLAGS) \ (__builtin_ia32_lwpval32((unsigned int) (DATA2), (unsigned int) (DATA1), \ (unsigned int) (FLAGS))) #ifdef __x86_64__ /// Inserts programmed event record into the LWP event ring buffer /// and advances the ring buffer pointer. /// /// \headerfile /// /// This intrinsic corresponds to the LWPINS instruction. /// /// \param DATA2 /// A 64-bit value is inserted into the 64-bit Data2 field. /// \param DATA1 /// A 32-bit value is inserted into the 32-bit Data1 field. /// \param FLAGS /// A 32-bit immediate value is inserted into the 32-bit Flags field. /// \returns If the ring buffer is full and LWP is running in Synchronized Mode, /// the event record overwrites the last record in the buffer, the MissedEvents /// counter in the LWPCB is incremented, the head pointer is not advanced, and /// 1 is returned. Otherwise 0 is returned. #define __lwpins64(DATA2, DATA1, FLAGS) \ (__builtin_ia32_lwpins64((unsigned long long) (DATA2), (unsigned int) (DATA1), \ (unsigned int) (FLAGS))) /// Decrements the LWP programmed value sample event counter. If the result is /// negative, inserts an event record into the LWP event ring buffer in memory /// and advances the ring buffer pointer. /// /// \headerfile /// /// This intrinsic corresponds to the LWPVAL instruction. /// /// \param DATA2 /// A 64-bit value is and inserted into the 64-bit Data2 field. /// \param DATA1 /// A 32-bit value is inserted into the 32-bit Data1 field. /// \param FLAGS /// A 32-bit immediate value is inserted into the 32-bit Flags field. #define __lwpval64(DATA2, DATA1, FLAGS) \ (__builtin_ia32_lwpval64((unsigned long long) (DATA2), (unsigned int) (DATA1), \ (unsigned int) (FLAGS))) #endif #undef __DEFAULT_FN_ATTRS #endif /* __LWPINTRIN_H */ /*===---- lzcntintrin.h - LZCNT intrinsics ---------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #if !defined __X86INTRIN_H && !defined __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __LZCNTINTRIN_H #define __LZCNTINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("lzcnt"))) #ifndef _MSC_VER /// Counts the number of leading zero bits in the operand. /// /// \headerfile /// /// This intrinsic corresponds to the \c LZCNT instruction. /// /// \param __X /// An unsigned 16-bit integer whose leading zeros are to be counted. /// \returns An unsigned 16-bit integer containing the number of leading zero /// bits in the operand. #define __lzcnt16(X) __builtin_ia32_lzcnt_u16((unsigned short)(X)) #endif // _MSC_VER /// Counts the number of leading zero bits in the operand. /// /// \headerfile /// /// This intrinsic corresponds to the \c LZCNT instruction. /// /// \param __X /// An unsigned 32-bit integer whose leading zeros are to be counted. /// \returns An unsigned 32-bit integer containing the number of leading zero /// bits in the operand. /// \see _lzcnt_u32 static __inline__ unsigned int __DEFAULT_FN_ATTRS __lzcnt32(unsigned int __X) { return __builtin_ia32_lzcnt_u32(__X); } /// Counts the number of leading zero bits in the operand. /// /// \headerfile /// /// This intrinsic corresponds to the \c LZCNT instruction. /// /// \param __X /// An unsigned 32-bit integer whose leading zeros are to be counted. /// \returns An unsigned 32-bit integer containing the number of leading zero /// bits in the operand. /// \see __lzcnt32 static __inline__ unsigned int __DEFAULT_FN_ATTRS _lzcnt_u32(unsigned int __X) { return __builtin_ia32_lzcnt_u32(__X); } #ifdef __x86_64__ #ifndef _MSC_VER /// Counts the number of leading zero bits in the operand. /// /// \headerfile /// /// This intrinsic corresponds to the \c LZCNT instruction. /// /// \param __X /// An unsigned 64-bit integer whose leading zeros are to be counted. /// \returns An unsigned 64-bit integer containing the number of leading zero /// bits in the operand. /// \see _lzcnt_u64 #define __lzcnt64(X) __builtin_ia32_lzcnt_u64((unsigned long long)(X)) #endif // _MSC_VER /// Counts the number of leading zero bits in the operand. /// /// \headerfile /// /// This intrinsic corresponds to the \c LZCNT instruction. /// /// \param __X /// An unsigned 64-bit integer whose leading zeros are to be counted. /// \returns An unsigned 64-bit integer containing the number of leading zero /// bits in the operand. /// \see __lzcnt64 static __inline__ unsigned long long __DEFAULT_FN_ATTRS _lzcnt_u64(unsigned long long __X) { return __builtin_ia32_lzcnt_u64(__X); } #endif #undef __DEFAULT_FN_ATTRS #endif /* __LZCNTINTRIN_H */ /*===---- mm3dnow.h - 3DNow! intrinsics ------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef _MM3DNOW_H_INCLUDED #define _MM3DNOW_H_INCLUDED #include #include typedef float __v2sf __attribute__((__vector_size__(8))); /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnow"), __min_vector_width__(64))) static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("3dnow"))) _m_femms(void) { __builtin_ia32_femms(); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pavgusb(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pavgusb((__v8qi)__m1, (__v8qi)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pf2id(__m64 __m) { return (__m64)__builtin_ia32_pf2id((__v2sf)__m); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfacc(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfacc((__v2sf)__m1, (__v2sf)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfadd(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfadd((__v2sf)__m1, (__v2sf)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfcmpeq(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfcmpeq((__v2sf)__m1, (__v2sf)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfcmpge(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfcmpge((__v2sf)__m1, (__v2sf)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfcmpgt(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfcmpgt((__v2sf)__m1, (__v2sf)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfmax(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfmax((__v2sf)__m1, (__v2sf)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfmin(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfmin((__v2sf)__m1, (__v2sf)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfmul(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfmul((__v2sf)__m1, (__v2sf)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfrcp(__m64 __m) { return (__m64)__builtin_ia32_pfrcp((__v2sf)__m); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfrcpit1(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfrcpit1((__v2sf)__m1, (__v2sf)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfrcpit2(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfrcpit2((__v2sf)__m1, (__v2sf)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfrsqrt(__m64 __m) { return (__m64)__builtin_ia32_pfrsqrt((__v2sf)__m); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfrsqrtit1(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfrsqit1((__v2sf)__m1, (__v2sf)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfsub(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfsub((__v2sf)__m1, (__v2sf)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfsubr(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfsubr((__v2sf)__m1, (__v2sf)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pi2fd(__m64 __m) { return (__m64)__builtin_ia32_pi2fd((__v2si)__m); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pmulhrw(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pmulhrw((__v4hi)__m1, (__v4hi)__m2); } /* Handle the 3dnowa instructions here. */ #undef __DEFAULT_FN_ATTRS #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnowa"), __min_vector_width__(64))) static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pf2iw(__m64 __m) { return (__m64)__builtin_ia32_pf2iw((__v2sf)__m); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfnacc(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfnacc((__v2sf)__m1, (__v2sf)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pfpnacc(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pfpnacc((__v2sf)__m1, (__v2sf)__m2); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pi2fw(__m64 __m) { return (__m64)__builtin_ia32_pi2fw((__v2si)__m); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pswapdsf(__m64 __m) { return (__m64)__builtin_ia32_pswapdsf((__v2sf)__m); } static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pswapdsi(__m64 __m) { return (__m64)__builtin_ia32_pswapdsi((__v2si)__m); } #undef __DEFAULT_FN_ATTRS #endif /*===---- mm_malloc.h - Allocating and Freeing Aligned Memory Blocks -------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __MM_MALLOC_H #define __MM_MALLOC_H #include #ifdef _WIN32 #include #else #ifndef __cplusplus extern int posix_memalign(void **__memptr, size_t __alignment, size_t __size); #else // Some systems (e.g. those with GNU libc) declare posix_memalign with an // exception specifier. Via an "egregious workaround" in // Sema::CheckEquivalentExceptionSpec, Clang accepts the following as a valid // redeclaration of glibc's declaration. extern "C" int posix_memalign(void **__memptr, size_t __alignment, size_t __size); #endif #endif #if !(defined(_WIN32) && defined(_mm_malloc)) static __inline__ void *__attribute__((__always_inline__, __nodebug__, __malloc__, __alloc_size__(1), __alloc_align__(2))) _mm_malloc(size_t __size, size_t __align) { if (__align == 1) { return malloc(__size); } if (!(__align & (__align - 1)) && __align < sizeof(void *)) __align = sizeof(void *); void *__mallocedMemory; #if defined(__MINGW32__) __mallocedMemory = __mingw_aligned_malloc(__size, __align); #elif defined(_WIN32) __mallocedMemory = _aligned_malloc(__size, __align); #else if (posix_memalign(&__mallocedMemory, __align, __size)) return 0; #endif return __mallocedMemory; } static __inline__ void __attribute__((__always_inline__, __nodebug__)) _mm_free(void *__p) { #if defined(__MINGW32__) __mingw_aligned_free(__p); #elif defined(_WIN32) _aligned_free(__p); #else free(__p); #endif } #endif #endif /* __MM_MALLOC_H */ /*===---- mmintrin.h - MMX intrinsics --------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __MMINTRIN_H #define __MMINTRIN_H #if !defined(__i386__) && !defined(__x86_64__) #error "This header is only meant to be used on x86 and x64 architecture" #endif typedef long long __m64 __attribute__((__vector_size__(8), __aligned__(8))); typedef long long __v1di __attribute__((__vector_size__(8))); typedef int __v2si __attribute__((__vector_size__(8))); typedef short __v4hi __attribute__((__vector_size__(8))); typedef char __v8qi __attribute__((__vector_size__(8))); /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx"), __min_vector_width__(64))) /// Clears the MMX state by setting the state of the x87 stack registers /// to empty. /// /// \headerfile /// /// This intrinsic corresponds to the EMMS instruction. /// static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("mmx"))) _mm_empty(void) { __builtin_ia32_emms(); } /// Constructs a 64-bit integer vector, setting the lower 32 bits to the /// value of the 32-bit integer parameter and setting the upper 32 bits to 0. /// /// \headerfile /// /// This intrinsic corresponds to the MOVD instruction. /// /// \param __i /// A 32-bit integer value. /// \returns A 64-bit integer vector. The lower 32 bits contain the value of the /// parameter. The upper 32 bits are set to 0. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvtsi32_si64(int __i) { return (__m64)__builtin_ia32_vec_init_v2si(__i, 0); } /// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit /// signed integer. /// /// \headerfile /// /// This intrinsic corresponds to the MOVD instruction. /// /// \param __m /// A 64-bit integer vector. /// \returns A 32-bit signed integer value containing the lower 32 bits of the /// parameter. static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsi64_si32(__m64 __m) { return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0); } /// Casts a 64-bit signed integer value into a 64-bit integer vector. /// /// \headerfile /// /// This intrinsic corresponds to the MOVQ instruction. /// /// \param __i /// A 64-bit signed integer. /// \returns A 64-bit integer vector containing the same bitwise pattern as the /// parameter. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvtsi64_m64(long long __i) { return (__m64)__i; } /// Casts a 64-bit integer vector into a 64-bit signed integer value. /// /// \headerfile /// /// This intrinsic corresponds to the MOVQ instruction. /// /// \param __m /// A 64-bit integer vector. /// \returns A 64-bit signed integer containing the same bitwise pattern as the /// parameter. static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvtm64_si64(__m64 __m) { return (long long)__m; } /// Converts 16-bit signed integers from both 64-bit integer vector /// parameters of [4 x i16] into 8-bit signed integer values, and constructs /// a 64-bit integer vector of [8 x i8] as the result. Positive values /// greater than 0x7F are saturated to 0x7F. Negative values less than 0x80 /// are saturated to 0x80. /// /// \headerfile /// /// This intrinsic corresponds to the PACKSSWB instruction. /// /// \param __m1 /// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a /// 16-bit signed integer and is converted to an 8-bit signed integer with /// saturation. Positive values greater than 0x7F are saturated to 0x7F. /// Negative values less than 0x80 are saturated to 0x80. The converted /// [4 x i8] values are written to the lower 32 bits of the result. /// \param __m2 /// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a /// 16-bit signed integer and is converted to an 8-bit signed integer with /// saturation. Positive values greater than 0x7F are saturated to 0x7F. /// Negative values less than 0x80 are saturated to 0x80. The converted /// [4 x i8] values are written to the upper 32 bits of the result. /// \returns A 64-bit integer vector of [8 x i8] containing the converted /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_packs_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2); } /// Converts 32-bit signed integers from both 64-bit integer vector /// parameters of [2 x i32] into 16-bit signed integer values, and constructs /// a 64-bit integer vector of [4 x i16] as the result. Positive values /// greater than 0x7FFF are saturated to 0x7FFF. Negative values less than /// 0x8000 are saturated to 0x8000. /// /// \headerfile /// /// This intrinsic corresponds to the PACKSSDW instruction. /// /// \param __m1 /// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a /// 32-bit signed integer and is converted to a 16-bit signed integer with /// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF. /// Negative values less than 0x8000 are saturated to 0x8000. The converted /// [2 x i16] values are written to the lower 32 bits of the result. /// \param __m2 /// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a /// 32-bit signed integer and is converted to a 16-bit signed integer with /// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF. /// Negative values less than 0x8000 are saturated to 0x8000. The converted /// [2 x i16] values are written to the upper 32 bits of the result. /// \returns A 64-bit integer vector of [4 x i16] containing the converted /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_packs_pi32(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2); } /// Converts 16-bit signed integers from both 64-bit integer vector /// parameters of [4 x i16] into 8-bit unsigned integer values, and /// constructs a 64-bit integer vector of [8 x i8] as the result. Values /// greater than 0xFF are saturated to 0xFF. Values less than 0 are saturated /// to 0. /// /// \headerfile /// /// This intrinsic corresponds to the PACKUSWB instruction. /// /// \param __m1 /// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a /// 16-bit signed integer and is converted to an 8-bit unsigned integer with /// saturation. Values greater than 0xFF are saturated to 0xFF. Values less /// than 0 are saturated to 0. The converted [4 x i8] values are written to /// the lower 32 bits of the result. /// \param __m2 /// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a /// 16-bit signed integer and is converted to an 8-bit unsigned integer with /// saturation. Values greater than 0xFF are saturated to 0xFF. Values less /// than 0 are saturated to 0. The converted [4 x i8] values are written to /// the upper 32 bits of the result. /// \returns A 64-bit integer vector of [8 x i8] containing the converted /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_packs_pu16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2); } /// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8] /// and interleaves them into a 64-bit integer vector of [8 x i8]. /// /// \headerfile /// /// This intrinsic corresponds to the PUNPCKHBW instruction. /// /// \param __m1 /// A 64-bit integer vector of [8 x i8]. \n /// Bits [39:32] are written to bits [7:0] of the result. \n /// Bits [47:40] are written to bits [23:16] of the result. \n /// Bits [55:48] are written to bits [39:32] of the result. \n /// Bits [63:56] are written to bits [55:48] of the result. /// \param __m2 /// A 64-bit integer vector of [8 x i8]. /// Bits [39:32] are written to bits [15:8] of the result. \n /// Bits [47:40] are written to bits [31:24] of the result. \n /// Bits [55:48] are written to bits [47:40] of the result. \n /// Bits [63:56] are written to bits [63:56] of the result. /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpackhi_pi8(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2); } /// Unpacks the upper 32 bits from two 64-bit integer vectors of /// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the PUNPCKHWD instruction. /// /// \param __m1 /// A 64-bit integer vector of [4 x i16]. /// Bits [47:32] are written to bits [15:0] of the result. \n /// Bits [63:48] are written to bits [47:32] of the result. /// \param __m2 /// A 64-bit integer vector of [4 x i16]. /// Bits [47:32] are written to bits [31:16] of the result. \n /// Bits [63:48] are written to bits [63:48] of the result. /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpackhi_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2); } /// Unpacks the upper 32 bits from two 64-bit integer vectors of /// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the PUNPCKHDQ instruction. /// /// \param __m1 /// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to /// the lower 32 bits of the result. /// \param __m2 /// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to /// the upper 32 bits of the result. /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpackhi_pi32(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2); } /// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8] /// and interleaves them into a 64-bit integer vector of [8 x i8]. /// /// \headerfile /// /// This intrinsic corresponds to the PUNPCKLBW instruction. /// /// \param __m1 /// A 64-bit integer vector of [8 x i8]. /// Bits [7:0] are written to bits [7:0] of the result. \n /// Bits [15:8] are written to bits [23:16] of the result. \n /// Bits [23:16] are written to bits [39:32] of the result. \n /// Bits [31:24] are written to bits [55:48] of the result. /// \param __m2 /// A 64-bit integer vector of [8 x i8]. /// Bits [7:0] are written to bits [15:8] of the result. \n /// Bits [15:8] are written to bits [31:24] of the result. \n /// Bits [23:16] are written to bits [47:40] of the result. \n /// Bits [31:24] are written to bits [63:56] of the result. /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpacklo_pi8(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2); } /// Unpacks the lower 32 bits from two 64-bit integer vectors of /// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the PUNPCKLWD instruction. /// /// \param __m1 /// A 64-bit integer vector of [4 x i16]. /// Bits [15:0] are written to bits [15:0] of the result. \n /// Bits [31:16] are written to bits [47:32] of the result. /// \param __m2 /// A 64-bit integer vector of [4 x i16]. /// Bits [15:0] are written to bits [31:16] of the result. \n /// Bits [31:16] are written to bits [63:48] of the result. /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpacklo_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2); } /// Unpacks the lower 32 bits from two 64-bit integer vectors of /// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the PUNPCKLDQ instruction. /// /// \param __m1 /// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to /// the lower 32 bits of the result. /// \param __m2 /// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to /// the upper 32 bits of the result. /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpacklo_pi32(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2); } /// Adds each 8-bit integer element of the first 64-bit integer vector /// of [8 x i8] to the corresponding 8-bit integer element of the second /// 64-bit integer vector of [8 x i8]. The lower 8 bits of the results are /// packed into a 64-bit integer vector of [8 x i8]. /// /// \headerfile /// /// This intrinsic corresponds to the PADDB instruction. /// /// \param __m1 /// A 64-bit integer vector of [8 x i8]. /// \param __m2 /// A 64-bit integer vector of [8 x i8]. /// \returns A 64-bit integer vector of [8 x i8] containing the sums of both /// parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_pi8(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2); } /// Adds each 16-bit integer element of the first 64-bit integer vector /// of [4 x i16] to the corresponding 16-bit integer element of the second /// 64-bit integer vector of [4 x i16]. The lower 16 bits of the results are /// packed into a 64-bit integer vector of [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the PADDW instruction. /// /// \param __m1 /// A 64-bit integer vector of [4 x i16]. /// \param __m2 /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the sums of both /// parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2); } /// Adds each 32-bit integer element of the first 64-bit integer vector /// of [2 x i32] to the corresponding 32-bit integer element of the second /// 64-bit integer vector of [2 x i32]. The lower 32 bits of the results are /// packed into a 64-bit integer vector of [2 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the PADDD instruction. /// /// \param __m1 /// A 64-bit integer vector of [2 x i32]. /// \param __m2 /// A 64-bit integer vector of [2 x i32]. /// \returns A 64-bit integer vector of [2 x i32] containing the sums of both /// parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_pi32(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2); } /// Adds each 8-bit signed integer element of the first 64-bit integer /// vector of [8 x i8] to the corresponding 8-bit signed integer element of /// the second 64-bit integer vector of [8 x i8]. Positive sums greater than /// 0x7F are saturated to 0x7F. Negative sums less than 0x80 are saturated to /// 0x80. The results are packed into a 64-bit integer vector of [8 x i8]. /// /// \headerfile /// /// This intrinsic corresponds to the PADDSB instruction. /// /// \param __m1 /// A 64-bit integer vector of [8 x i8]. /// \param __m2 /// A 64-bit integer vector of [8 x i8]. /// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums /// of both parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_adds_pi8(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2); } /// Adds each 16-bit signed integer element of the first 64-bit integer /// vector of [4 x i16] to the corresponding 16-bit signed integer element of /// the second 64-bit integer vector of [4 x i16]. Positive sums greater than /// 0x7FFF are saturated to 0x7FFF. Negative sums less than 0x8000 are /// saturated to 0x8000. The results are packed into a 64-bit integer vector /// of [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the PADDSW instruction. /// /// \param __m1 /// A 64-bit integer vector of [4 x i16]. /// \param __m2 /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums /// of both parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_adds_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2); } /// Adds each 8-bit unsigned integer element of the first 64-bit integer /// vector of [8 x i8] to the corresponding 8-bit unsigned integer element of /// the second 64-bit integer vector of [8 x i8]. Sums greater than 0xFF are /// saturated to 0xFF. The results are packed into a 64-bit integer vector of /// [8 x i8]. /// /// \headerfile /// /// This intrinsic corresponds to the PADDUSB instruction. /// /// \param __m1 /// A 64-bit integer vector of [8 x i8]. /// \param __m2 /// A 64-bit integer vector of [8 x i8]. /// \returns A 64-bit integer vector of [8 x i8] containing the saturated /// unsigned sums of both parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_adds_pu8(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2); } /// Adds each 16-bit unsigned integer element of the first 64-bit integer /// vector of [4 x i16] to the corresponding 16-bit unsigned integer element /// of the second 64-bit integer vector of [4 x i16]. Sums greater than /// 0xFFFF are saturated to 0xFFFF. The results are packed into a 64-bit /// integer vector of [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the PADDUSW instruction. /// /// \param __m1 /// A 64-bit integer vector of [4 x i16]. /// \param __m2 /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the saturated /// unsigned sums of both parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_adds_pu16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2); } /// Subtracts each 8-bit integer element of the second 64-bit integer /// vector of [8 x i8] from the corresponding 8-bit integer element of the /// first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results /// are packed into a 64-bit integer vector of [8 x i8]. /// /// \headerfile /// /// This intrinsic corresponds to the PSUBB instruction. /// /// \param __m1 /// A 64-bit integer vector of [8 x i8] containing the minuends. /// \param __m2 /// A 64-bit integer vector of [8 x i8] containing the subtrahends. /// \returns A 64-bit integer vector of [8 x i8] containing the differences of /// both parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_pi8(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2); } /// Subtracts each 16-bit integer element of the second 64-bit integer /// vector of [4 x i16] from the corresponding 16-bit integer element of the /// first 64-bit integer vector of [4 x i16]. The lower 16 bits of the /// results are packed into a 64-bit integer vector of [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the PSUBW instruction. /// /// \param __m1 /// A 64-bit integer vector of [4 x i16] containing the minuends. /// \param __m2 /// A 64-bit integer vector of [4 x i16] containing the subtrahends. /// \returns A 64-bit integer vector of [4 x i16] containing the differences of /// both parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2); } /// Subtracts each 32-bit integer element of the second 64-bit integer /// vector of [2 x i32] from the corresponding 32-bit integer element of the /// first 64-bit integer vector of [2 x i32]. The lower 32 bits of the /// results are packed into a 64-bit integer vector of [2 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the PSUBD instruction. /// /// \param __m1 /// A 64-bit integer vector of [2 x i32] containing the minuends. /// \param __m2 /// A 64-bit integer vector of [2 x i32] containing the subtrahends. /// \returns A 64-bit integer vector of [2 x i32] containing the differences of /// both parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_pi32(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2); } /// Subtracts each 8-bit signed integer element of the second 64-bit /// integer vector of [8 x i8] from the corresponding 8-bit signed integer /// element of the first 64-bit integer vector of [8 x i8]. Positive results /// greater than 0x7F are saturated to 0x7F. Negative results less than 0x80 /// are saturated to 0x80. The results are packed into a 64-bit integer /// vector of [8 x i8]. /// /// \headerfile /// /// This intrinsic corresponds to the PSUBSB instruction. /// /// \param __m1 /// A 64-bit integer vector of [8 x i8] containing the minuends. /// \param __m2 /// A 64-bit integer vector of [8 x i8] containing the subtrahends. /// \returns A 64-bit integer vector of [8 x i8] containing the saturated /// differences of both parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_subs_pi8(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2); } /// Subtracts each 16-bit signed integer element of the second 64-bit /// integer vector of [4 x i16] from the corresponding 16-bit signed integer /// element of the first 64-bit integer vector of [4 x i16]. Positive results /// greater than 0x7FFF are saturated to 0x7FFF. Negative results less than /// 0x8000 are saturated to 0x8000. The results are packed into a 64-bit /// integer vector of [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the PSUBSW instruction. /// /// \param __m1 /// A 64-bit integer vector of [4 x i16] containing the minuends. /// \param __m2 /// A 64-bit integer vector of [4 x i16] containing the subtrahends. /// \returns A 64-bit integer vector of [4 x i16] containing the saturated /// differences of both parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_subs_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2); } /// Subtracts each 8-bit unsigned integer element of the second 64-bit /// integer vector of [8 x i8] from the corresponding 8-bit unsigned integer /// element of the first 64-bit integer vector of [8 x i8]. /// /// If an element of the first vector is less than the corresponding element /// of the second vector, the result is saturated to 0. The results are /// packed into a 64-bit integer vector of [8 x i8]. /// /// \headerfile /// /// This intrinsic corresponds to the PSUBUSB instruction. /// /// \param __m1 /// A 64-bit integer vector of [8 x i8] containing the minuends. /// \param __m2 /// A 64-bit integer vector of [8 x i8] containing the subtrahends. /// \returns A 64-bit integer vector of [8 x i8] containing the saturated /// differences of both parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_subs_pu8(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2); } /// Subtracts each 16-bit unsigned integer element of the second 64-bit /// integer vector of [4 x i16] from the corresponding 16-bit unsigned /// integer element of the first 64-bit integer vector of [4 x i16]. /// /// If an element of the first vector is less than the corresponding element /// of the second vector, the result is saturated to 0. The results are /// packed into a 64-bit integer vector of [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the PSUBUSW instruction. /// /// \param __m1 /// A 64-bit integer vector of [4 x i16] containing the minuends. /// \param __m2 /// A 64-bit integer vector of [4 x i16] containing the subtrahends. /// \returns A 64-bit integer vector of [4 x i16] containing the saturated /// differences of both parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_subs_pu16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2); } /// Multiplies each 16-bit signed integer element of the first 64-bit /// integer vector of [4 x i16] by the corresponding 16-bit signed integer /// element of the second 64-bit integer vector of [4 x i16] and get four /// 32-bit products. Adds adjacent pairs of products to get two 32-bit sums. /// The lower 32 bits of these two sums are packed into a 64-bit integer /// vector of [2 x i32]. /// /// For example, bits [15:0] of both parameters are multiplied, bits [31:16] /// of both parameters are multiplied, and the sum of both results is written /// to bits [31:0] of the result. /// /// \headerfile /// /// This intrinsic corresponds to the PMADDWD instruction. /// /// \param __m1 /// A 64-bit integer vector of [4 x i16]. /// \param __m2 /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [2 x i32] containing the sums of /// products of both parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_madd_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2); } /// Multiplies each 16-bit signed integer element of the first 64-bit /// integer vector of [4 x i16] by the corresponding 16-bit signed integer /// element of the second 64-bit integer vector of [4 x i16]. Packs the upper /// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the PMULHW instruction. /// /// \param __m1 /// A 64-bit integer vector of [4 x i16]. /// \param __m2 /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits /// of the products of both parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mulhi_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2); } /// Multiplies each 16-bit signed integer element of the first 64-bit /// integer vector of [4 x i16] by the corresponding 16-bit signed integer /// element of the second 64-bit integer vector of [4 x i16]. Packs the lower /// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the PMULLW instruction. /// /// \param __m1 /// A 64-bit integer vector of [4 x i16]. /// \param __m2 /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits /// of the products of both parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mullo_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2); } /// Left-shifts each 16-bit signed integer element of the first /// parameter, which is a 64-bit integer vector of [4 x i16], by the number /// of bits specified by the second parameter, which is a 64-bit integer. The /// lower 16 bits of the results are packed into a 64-bit integer vector of /// [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the PSLLW instruction. /// /// \param __m /// A 64-bit integer vector of [4 x i16]. /// \param __count /// A 64-bit integer vector interpreted as a single 64-bit integer. /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted /// values. If \a __count is greater or equal to 16, the result is set to all /// 0. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sll_pi16(__m64 __m, __m64 __count) { return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count); } /// Left-shifts each 16-bit signed integer element of a 64-bit integer /// vector of [4 x i16] by the number of bits specified by a 32-bit integer. /// The lower 16 bits of the results are packed into a 64-bit integer vector /// of [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the PSLLW instruction. /// /// \param __m /// A 64-bit integer vector of [4 x i16]. /// \param __count /// A 32-bit integer value. /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted /// values. If \a __count is greater or equal to 16, the result is set to all /// 0. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_slli_pi16(__m64 __m, int __count) { return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count); } /// Left-shifts each 32-bit signed integer element of the first /// parameter, which is a 64-bit integer vector of [2 x i32], by the number /// of bits specified by the second parameter, which is a 64-bit integer. The /// lower 32 bits of the results are packed into a 64-bit integer vector of /// [2 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the PSLLD instruction. /// /// \param __m /// A 64-bit integer vector of [2 x i32]. /// \param __count /// A 64-bit integer vector interpreted as a single 64-bit integer. /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted /// values. If \a __count is greater or equal to 32, the result is set to all /// 0. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sll_pi32(__m64 __m, __m64 __count) { return (__m64)__builtin_ia32_pslld((__v2si)__m, __count); } /// Left-shifts each 32-bit signed integer element of a 64-bit integer /// vector of [2 x i32] by the number of bits specified by a 32-bit integer. /// The lower 32 bits of the results are packed into a 64-bit integer vector /// of [2 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the PSLLD instruction. /// /// \param __m /// A 64-bit integer vector of [2 x i32]. /// \param __count /// A 32-bit integer value. /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted /// values. If \a __count is greater or equal to 32, the result is set to all /// 0. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_slli_pi32(__m64 __m, int __count) { return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count); } /// Left-shifts the first 64-bit integer parameter by the number of bits /// specified by the second 64-bit integer parameter. The lower 64 bits of /// result are returned. /// /// \headerfile /// /// This intrinsic corresponds to the PSLLQ instruction. /// /// \param __m /// A 64-bit integer vector interpreted as a single 64-bit integer. /// \param __count /// A 64-bit integer vector interpreted as a single 64-bit integer. /// \returns A 64-bit integer vector containing the left-shifted value. If /// \a __count is greater or equal to 64, the result is set to 0. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sll_si64(__m64 __m, __m64 __count) { return (__m64)__builtin_ia32_psllq((__v1di)__m, __count); } /// Left-shifts the first parameter, which is a 64-bit integer, by the /// number of bits specified by the second parameter, which is a 32-bit /// integer. The lower 64 bits of result are returned. /// /// \headerfile /// /// This intrinsic corresponds to the PSLLQ instruction. /// /// \param __m /// A 64-bit integer vector interpreted as a single 64-bit integer. /// \param __count /// A 32-bit integer value. /// \returns A 64-bit integer vector containing the left-shifted value. If /// \a __count is greater or equal to 64, the result is set to 0. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_slli_si64(__m64 __m, int __count) { return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count); } /// Right-shifts each 16-bit integer element of the first parameter, /// which is a 64-bit integer vector of [4 x i16], by the number of bits /// specified by the second parameter, which is a 64-bit integer. /// /// High-order bits are filled with the sign bit of the initial value of each /// 16-bit element. The 16-bit results are packed into a 64-bit integer /// vector of [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the PSRAW instruction. /// /// \param __m /// A 64-bit integer vector of [4 x i16]. /// \param __count /// A 64-bit integer vector interpreted as a single 64-bit integer. /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sra_pi16(__m64 __m, __m64 __count) { return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count); } /// Right-shifts each 16-bit integer element of a 64-bit integer vector /// of [4 x i16] by the number of bits specified by a 32-bit integer. /// /// High-order bits are filled with the sign bit of the initial value of each /// 16-bit element. The 16-bit results are packed into a 64-bit integer /// vector of [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the PSRAW instruction. /// /// \param __m /// A 64-bit integer vector of [4 x i16]. /// \param __count /// A 32-bit integer value. /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srai_pi16(__m64 __m, int __count) { return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count); } /// Right-shifts each 32-bit integer element of the first parameter, /// which is a 64-bit integer vector of [2 x i32], by the number of bits /// specified by the second parameter, which is a 64-bit integer. /// /// High-order bits are filled with the sign bit of the initial value of each /// 32-bit element. The 32-bit results are packed into a 64-bit integer /// vector of [2 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the PSRAD instruction. /// /// \param __m /// A 64-bit integer vector of [2 x i32]. /// \param __count /// A 64-bit integer vector interpreted as a single 64-bit integer. /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sra_pi32(__m64 __m, __m64 __count) { return (__m64)__builtin_ia32_psrad((__v2si)__m, __count); } /// Right-shifts each 32-bit integer element of a 64-bit integer vector /// of [2 x i32] by the number of bits specified by a 32-bit integer. /// /// High-order bits are filled with the sign bit of the initial value of each /// 32-bit element. The 32-bit results are packed into a 64-bit integer /// vector of [2 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the PSRAD instruction. /// /// \param __m /// A 64-bit integer vector of [2 x i32]. /// \param __count /// A 32-bit integer value. /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srai_pi32(__m64 __m, int __count) { return (__m64)__builtin_ia32_psradi((__v2si)__m, __count); } /// Right-shifts each 16-bit integer element of the first parameter, /// which is a 64-bit integer vector of [4 x i16], by the number of bits /// specified by the second parameter, which is a 64-bit integer. /// /// High-order bits are cleared. The 16-bit results are packed into a 64-bit /// integer vector of [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the PSRLW instruction. /// /// \param __m /// A 64-bit integer vector of [4 x i16]. /// \param __count /// A 64-bit integer vector interpreted as a single 64-bit integer. /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srl_pi16(__m64 __m, __m64 __count) { return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count); } /// Right-shifts each 16-bit integer element of a 64-bit integer vector /// of [4 x i16] by the number of bits specified by a 32-bit integer. /// /// High-order bits are cleared. The 16-bit results are packed into a 64-bit /// integer vector of [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the PSRLW instruction. /// /// \param __m /// A 64-bit integer vector of [4 x i16]. /// \param __count /// A 32-bit integer value. /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srli_pi16(__m64 __m, int __count) { return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count); } /// Right-shifts each 32-bit integer element of the first parameter, /// which is a 64-bit integer vector of [2 x i32], by the number of bits /// specified by the second parameter, which is a 64-bit integer. /// /// High-order bits are cleared. The 32-bit results are packed into a 64-bit /// integer vector of [2 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the PSRLD instruction. /// /// \param __m /// A 64-bit integer vector of [2 x i32]. /// \param __count /// A 64-bit integer vector interpreted as a single 64-bit integer. /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srl_pi32(__m64 __m, __m64 __count) { return (__m64)__builtin_ia32_psrld((__v2si)__m, __count); } /// Right-shifts each 32-bit integer element of a 64-bit integer vector /// of [2 x i32] by the number of bits specified by a 32-bit integer. /// /// High-order bits are cleared. The 32-bit results are packed into a 64-bit /// integer vector of [2 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the PSRLD instruction. /// /// \param __m /// A 64-bit integer vector of [2 x i32]. /// \param __count /// A 32-bit integer value. /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srli_pi32(__m64 __m, int __count) { return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count); } /// Right-shifts the first 64-bit integer parameter by the number of bits /// specified by the second 64-bit integer parameter. /// /// High-order bits are cleared. /// /// \headerfile /// /// This intrinsic corresponds to the PSRLQ instruction. /// /// \param __m /// A 64-bit integer vector interpreted as a single 64-bit integer. /// \param __count /// A 64-bit integer vector interpreted as a single 64-bit integer. /// \returns A 64-bit integer vector containing the right-shifted value. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srl_si64(__m64 __m, __m64 __count) { return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count); } /// Right-shifts the first parameter, which is a 64-bit integer, by the /// number of bits specified by the second parameter, which is a 32-bit /// integer. /// /// High-order bits are cleared. /// /// \headerfile /// /// This intrinsic corresponds to the PSRLQ instruction. /// /// \param __m /// A 64-bit integer vector interpreted as a single 64-bit integer. /// \param __count /// A 32-bit integer value. /// \returns A 64-bit integer vector containing the right-shifted value. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srli_si64(__m64 __m, int __count) { return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count); } /// Performs a bitwise AND of two 64-bit integer vectors. /// /// \headerfile /// /// This intrinsic corresponds to the PAND instruction. /// /// \param __m1 /// A 64-bit integer vector. /// \param __m2 /// A 64-bit integer vector. /// \returns A 64-bit integer vector containing the bitwise AND of both /// parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_and_si64(__m64 __m1, __m64 __m2) { return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2); } /// Performs a bitwise NOT of the first 64-bit integer vector, and then /// performs a bitwise AND of the intermediate result and the second 64-bit /// integer vector. /// /// \headerfile /// /// This intrinsic corresponds to the PANDN instruction. /// /// \param __m1 /// A 64-bit integer vector. The one's complement of this parameter is used /// in the bitwise AND. /// \param __m2 /// A 64-bit integer vector. /// \returns A 64-bit integer vector containing the bitwise AND of the second /// parameter and the one's complement of the first parameter. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_andnot_si64(__m64 __m1, __m64 __m2) { return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2); } /// Performs a bitwise OR of two 64-bit integer vectors. /// /// \headerfile /// /// This intrinsic corresponds to the POR instruction. /// /// \param __m1 /// A 64-bit integer vector. /// \param __m2 /// A 64-bit integer vector. /// \returns A 64-bit integer vector containing the bitwise OR of both /// parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_or_si64(__m64 __m1, __m64 __m2) { return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2); } /// Performs a bitwise exclusive OR of two 64-bit integer vectors. /// /// \headerfile /// /// This intrinsic corresponds to the PXOR instruction. /// /// \param __m1 /// A 64-bit integer vector. /// \param __m2 /// A 64-bit integer vector. /// \returns A 64-bit integer vector containing the bitwise exclusive OR of both /// parameters. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_xor_si64(__m64 __m1, __m64 __m2) { return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2); } /// Compares the 8-bit integer elements of two 64-bit integer vectors of /// [8 x i8] to determine if the element of the first vector is equal to the /// corresponding element of the second vector. /// /// The comparison yields 0 for false, 0xFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the PCMPEQB instruction. /// /// \param __m1 /// A 64-bit integer vector of [8 x i8]. /// \param __m2 /// A 64-bit integer vector of [8 x i8]. /// \returns A 64-bit integer vector of [8 x i8] containing the comparison /// results. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpeq_pi8(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2); } /// Compares the 16-bit integer elements of two 64-bit integer vectors of /// [4 x i16] to determine if the element of the first vector is equal to the /// corresponding element of the second vector. /// /// The comparison yields 0 for false, 0xFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the PCMPEQW instruction. /// /// \param __m1 /// A 64-bit integer vector of [4 x i16]. /// \param __m2 /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the comparison /// results. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpeq_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2); } /// Compares the 32-bit integer elements of two 64-bit integer vectors of /// [2 x i32] to determine if the element of the first vector is equal to the /// corresponding element of the second vector. /// /// The comparison yields 0 for false, 0xFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the PCMPEQD instruction. /// /// \param __m1 /// A 64-bit integer vector of [2 x i32]. /// \param __m2 /// A 64-bit integer vector of [2 x i32]. /// \returns A 64-bit integer vector of [2 x i32] containing the comparison /// results. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpeq_pi32(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2); } /// Compares the 8-bit integer elements of two 64-bit integer vectors of /// [8 x i8] to determine if the element of the first vector is greater than /// the corresponding element of the second vector. /// /// The comparison yields 0 for false, 0xFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the PCMPGTB instruction. /// /// \param __m1 /// A 64-bit integer vector of [8 x i8]. /// \param __m2 /// A 64-bit integer vector of [8 x i8]. /// \returns A 64-bit integer vector of [8 x i8] containing the comparison /// results. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpgt_pi8(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2); } /// Compares the 16-bit integer elements of two 64-bit integer vectors of /// [4 x i16] to determine if the element of the first vector is greater than /// the corresponding element of the second vector. /// /// The comparison yields 0 for false, 0xFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the PCMPGTW instruction. /// /// \param __m1 /// A 64-bit integer vector of [4 x i16]. /// \param __m2 /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the comparison /// results. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpgt_pi16(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2); } /// Compares the 32-bit integer elements of two 64-bit integer vectors of /// [2 x i32] to determine if the element of the first vector is greater than /// the corresponding element of the second vector. /// /// The comparison yields 0 for false, 0xFFFFFFFF for true. /// /// \headerfile /// /// This intrinsic corresponds to the PCMPGTD instruction. /// /// \param __m1 /// A 64-bit integer vector of [2 x i32]. /// \param __m2 /// A 64-bit integer vector of [2 x i32]. /// \returns A 64-bit integer vector of [2 x i32] containing the comparison /// results. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpgt_pi32(__m64 __m1, __m64 __m2) { return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2); } /// Constructs a 64-bit integer vector initialized to zero. /// /// \headerfile /// /// This intrinsic corresponds to the PXOR instruction. /// /// \returns An initialized 64-bit integer vector with all elements set to zero. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_setzero_si64(void) { return __extension__ (__m64){ 0LL }; } /// Constructs a 64-bit integer vector initialized with the specified /// 32-bit integer values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __i1 /// A 32-bit integer value used to initialize the upper 32 bits of the /// result. /// \param __i0 /// A 32-bit integer value used to initialize the lower 32 bits of the /// result. /// \returns An initialized 64-bit integer vector. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set_pi32(int __i1, int __i0) { return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1); } /// Constructs a 64-bit integer vector initialized with the specified /// 16-bit integer values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __s3 /// A 16-bit integer value used to initialize bits [63:48] of the result. /// \param __s2 /// A 16-bit integer value used to initialize bits [47:32] of the result. /// \param __s1 /// A 16-bit integer value used to initialize bits [31:16] of the result. /// \param __s0 /// A 16-bit integer value used to initialize bits [15:0] of the result. /// \returns An initialized 64-bit integer vector. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set_pi16(short __s3, short __s2, short __s1, short __s0) { return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3); } /// Constructs a 64-bit integer vector initialized with the specified /// 8-bit integer values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __b7 /// An 8-bit integer value used to initialize bits [63:56] of the result. /// \param __b6 /// An 8-bit integer value used to initialize bits [55:48] of the result. /// \param __b5 /// An 8-bit integer value used to initialize bits [47:40] of the result. /// \param __b4 /// An 8-bit integer value used to initialize bits [39:32] of the result. /// \param __b3 /// An 8-bit integer value used to initialize bits [31:24] of the result. /// \param __b2 /// An 8-bit integer value used to initialize bits [23:16] of the result. /// \param __b1 /// An 8-bit integer value used to initialize bits [15:8] of the result. /// \param __b0 /// An 8-bit integer value used to initialize bits [7:0] of the result. /// \returns An initialized 64-bit integer vector. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0) { return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7); } /// Constructs a 64-bit integer vector of [2 x i32], with each of the /// 32-bit integer vector elements set to the specified 32-bit integer /// value. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __i /// A 32-bit integer value used to initialize each vector element of the /// result. /// \returns An initialized 64-bit integer vector of [2 x i32]. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set1_pi32(int __i) { return _mm_set_pi32(__i, __i); } /// Constructs a 64-bit integer vector of [4 x i16], with each of the /// 16-bit integer vector elements set to the specified 16-bit integer /// value. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __w /// A 16-bit integer value used to initialize each vector element of the /// result. /// \returns An initialized 64-bit integer vector of [4 x i16]. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set1_pi16(short __w) { return _mm_set_pi16(__w, __w, __w, __w); } /// Constructs a 64-bit integer vector of [8 x i8], with each of the /// 8-bit integer vector elements set to the specified 8-bit integer value. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __b /// An 8-bit integer value used to initialize each vector element of the /// result. /// \returns An initialized 64-bit integer vector of [8 x i8]. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set1_pi8(char __b) { return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b); } /// Constructs a 64-bit integer vector, initialized in reverse order with /// the specified 32-bit integer values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __i0 /// A 32-bit integer value used to initialize the lower 32 bits of the /// result. /// \param __i1 /// A 32-bit integer value used to initialize the upper 32 bits of the /// result. /// \returns An initialized 64-bit integer vector. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_setr_pi32(int __i0, int __i1) { return _mm_set_pi32(__i1, __i0); } /// Constructs a 64-bit integer vector, initialized in reverse order with /// the specified 16-bit integer values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __w0 /// A 16-bit integer value used to initialize bits [15:0] of the result. /// \param __w1 /// A 16-bit integer value used to initialize bits [31:16] of the result. /// \param __w2 /// A 16-bit integer value used to initialize bits [47:32] of the result. /// \param __w3 /// A 16-bit integer value used to initialize bits [63:48] of the result. /// \returns An initialized 64-bit integer vector. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) { return _mm_set_pi16(__w3, __w2, __w1, __w0); } /// Constructs a 64-bit integer vector, initialized in reverse order with /// the specified 8-bit integer values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __b0 /// An 8-bit integer value used to initialize bits [7:0] of the result. /// \param __b1 /// An 8-bit integer value used to initialize bits [15:8] of the result. /// \param __b2 /// An 8-bit integer value used to initialize bits [23:16] of the result. /// \param __b3 /// An 8-bit integer value used to initialize bits [31:24] of the result. /// \param __b4 /// An 8-bit integer value used to initialize bits [39:32] of the result. /// \param __b5 /// An 8-bit integer value used to initialize bits [47:40] of the result. /// \param __b6 /// An 8-bit integer value used to initialize bits [55:48] of the result. /// \param __b7 /// An 8-bit integer value used to initialize bits [63:56] of the result. /// \returns An initialized 64-bit integer vector. static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7) { return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); } #undef __DEFAULT_FN_ATTRS /* Aliases for compatibility. */ #define _m_empty _mm_empty #define _m_from_int _mm_cvtsi32_si64 #define _m_from_int64 _mm_cvtsi64_m64 #define _m_to_int _mm_cvtsi64_si32 #define _m_to_int64 _mm_cvtm64_si64 #define _m_packsswb _mm_packs_pi16 #define _m_packssdw _mm_packs_pi32 #define _m_packuswb _mm_packs_pu16 #define _m_punpckhbw _mm_unpackhi_pi8 #define _m_punpckhwd _mm_unpackhi_pi16 #define _m_punpckhdq _mm_unpackhi_pi32 #define _m_punpcklbw _mm_unpacklo_pi8 #define _m_punpcklwd _mm_unpacklo_pi16 #define _m_punpckldq _mm_unpacklo_pi32 #define _m_paddb _mm_add_pi8 #define _m_paddw _mm_add_pi16 #define _m_paddd _mm_add_pi32 #define _m_paddsb _mm_adds_pi8 #define _m_paddsw _mm_adds_pi16 #define _m_paddusb _mm_adds_pu8 #define _m_paddusw _mm_adds_pu16 #define _m_psubb _mm_sub_pi8 #define _m_psubw _mm_sub_pi16 #define _m_psubd _mm_sub_pi32 #define _m_psubsb _mm_subs_pi8 #define _m_psubsw _mm_subs_pi16 #define _m_psubusb _mm_subs_pu8 #define _m_psubusw _mm_subs_pu16 #define _m_pmaddwd _mm_madd_pi16 #define _m_pmulhw _mm_mulhi_pi16 #define _m_pmullw _mm_mullo_pi16 #define _m_psllw _mm_sll_pi16 #define _m_psllwi _mm_slli_pi16 #define _m_pslld _mm_sll_pi32 #define _m_pslldi _mm_slli_pi32 #define _m_psllq _mm_sll_si64 #define _m_psllqi _mm_slli_si64 #define _m_psraw _mm_sra_pi16 #define _m_psrawi _mm_srai_pi16 #define _m_psrad _mm_sra_pi32 #define _m_psradi _mm_srai_pi32 #define _m_psrlw _mm_srl_pi16 #define _m_psrlwi _mm_srli_pi16 #define _m_psrld _mm_srl_pi32 #define _m_psrldi _mm_srli_pi32 #define _m_psrlq _mm_srl_si64 #define _m_psrlqi _mm_srli_si64 #define _m_pand _mm_and_si64 #define _m_pandn _mm_andnot_si64 #define _m_por _mm_or_si64 #define _m_pxor _mm_xor_si64 #define _m_pcmpeqb _mm_cmpeq_pi8 #define _m_pcmpeqw _mm_cmpeq_pi16 #define _m_pcmpeqd _mm_cmpeq_pi32 #define _m_pcmpgtb _mm_cmpgt_pi8 #define _m_pcmpgtw _mm_cmpgt_pi16 #define _m_pcmpgtd _mm_cmpgt_pi32 #endif /* __MMINTRIN_H */ /*===------------------------- movdirintrin.h ------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #if !defined __X86INTRIN_H && !defined __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef _MOVDIRINTRIN_H #define _MOVDIRINTRIN_H /* Move doubleword as direct store */ static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movdiri"))) _directstoreu_u32 (void *__dst, unsigned int __value) { __builtin_ia32_directstore_u32((unsigned int *)__dst, (unsigned int)__value); } #ifdef __x86_64__ /* Move quadword as direct store */ static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movdiri"))) _directstoreu_u64 (void *__dst, unsigned long __value) { __builtin_ia32_directstore_u64((unsigned long *)__dst, __value); } #endif /* __x86_64__ */ /* * movdir64b - Move 64 bytes as direct store. * The destination must be 64 byte aligned, and the store is atomic. * The source address has no alignment requirement, and the load from * the source address is not atomic. */ static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movdir64b"))) _movdir64b (void *__dst __attribute__((align_value(64))), const void *__src) { __builtin_ia32_movdir64b(__dst, __src); } #endif /* _MOVDIRINTRIN_H */ /*===---- msa.h - MIPS MSA intrinsics --------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef _MSA_H #define _MSA_H 1 #if defined(__mips_msa) typedef signed char v16i8 __attribute__((vector_size(16), aligned(16))); typedef signed char v16i8_b __attribute__((vector_size(16), aligned(1))); typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16))); typedef unsigned char v16u8_b __attribute__((vector_size(16), aligned(1))); typedef short v8i16 __attribute__((vector_size(16), aligned(16))); typedef short v8i16_h __attribute__((vector_size(16), aligned(2))); typedef unsigned short v8u16 __attribute__((vector_size(16), aligned(16))); typedef unsigned short v8u16_h __attribute__((vector_size(16), aligned(2))); typedef int v4i32 __attribute__((vector_size(16), aligned(16))); typedef int v4i32_w __attribute__((vector_size(16), aligned(4))); typedef unsigned int v4u32 __attribute__((vector_size(16), aligned(16))); typedef unsigned int v4u32_w __attribute__((vector_size(16), aligned(4))); typedef long long v2i64 __attribute__((vector_size(16), aligned(16))); typedef long long v2i64_d __attribute__((vector_size(16), aligned(8))); typedef unsigned long long v2u64 __attribute__((vector_size(16), aligned(16))); typedef unsigned long long v2u64_d __attribute__((vector_size(16), aligned(8))); typedef float v4f32 __attribute__((vector_size(16), aligned(16))); typedef float v4f32_w __attribute__((vector_size(16), aligned(4))); typedef double v2f64 __attribute__ ((vector_size(16), aligned(16))); typedef double v2f64_d __attribute__ ((vector_size(16), aligned(8))); #define __msa_sll_b __builtin_msa_sll_b #define __msa_sll_h __builtin_msa_sll_h #define __msa_sll_w __builtin_msa_sll_w #define __msa_sll_d __builtin_msa_sll_d #define __msa_slli_b __builtin_msa_slli_b #define __msa_slli_h __builtin_msa_slli_h #define __msa_slli_w __builtin_msa_slli_w #define __msa_slli_d __builtin_msa_slli_d #define __msa_sra_b __builtin_msa_sra_b #define __msa_sra_h __builtin_msa_sra_h #define __msa_sra_w __builtin_msa_sra_w #define __msa_sra_d __builtin_msa_sra_d #define __msa_srai_b __builtin_msa_srai_b #define __msa_srai_h __builtin_msa_srai_h #define __msa_srai_w __builtin_msa_srai_w #define __msa_srai_d __builtin_msa_srai_d #define __msa_srar_b __builtin_msa_srar_b #define __msa_srar_h __builtin_msa_srar_h #define __msa_srar_w __builtin_msa_srar_w #define __msa_srar_d __builtin_msa_srar_d #define __msa_srari_b __builtin_msa_srari_b #define __msa_srari_h __builtin_msa_srari_h #define __msa_srari_w __builtin_msa_srari_w #define __msa_srari_d __builtin_msa_srari_d #define __msa_srl_b __builtin_msa_srl_b #define __msa_srl_h __builtin_msa_srl_h #define __msa_srl_w __builtin_msa_srl_w #define __msa_srl_d __builtin_msa_srl_d #define __msa_srli_b __builtin_msa_srli_b #define __msa_srli_h __builtin_msa_srli_h #define __msa_srli_w __builtin_msa_srli_w #define __msa_srli_d __builtin_msa_srli_d #define __msa_srlr_b __builtin_msa_srlr_b #define __msa_srlr_h __builtin_msa_srlr_h #define __msa_srlr_w __builtin_msa_srlr_w #define __msa_srlr_d __builtin_msa_srlr_d #define __msa_srlri_b __builtin_msa_srlri_b #define __msa_srlri_h __builtin_msa_srlri_h #define __msa_srlri_w __builtin_msa_srlri_w #define __msa_srlri_d __builtin_msa_srlri_d #define __msa_bclr_b __builtin_msa_bclr_b #define __msa_bclr_h __builtin_msa_bclr_h #define __msa_bclr_w __builtin_msa_bclr_w #define __msa_bclr_d __builtin_msa_bclr_d #define __msa_bclri_b __builtin_msa_bclri_b #define __msa_bclri_h __builtin_msa_bclri_h #define __msa_bclri_w __builtin_msa_bclri_w #define __msa_bclri_d __builtin_msa_bclri_d #define __msa_bset_b __builtin_msa_bset_b #define __msa_bset_h __builtin_msa_bset_h #define __msa_bset_w __builtin_msa_bset_w #define __msa_bset_d __builtin_msa_bset_d #define __msa_bseti_b __builtin_msa_bseti_b #define __msa_bseti_h __builtin_msa_bseti_h #define __msa_bseti_w __builtin_msa_bseti_w #define __msa_bseti_d __builtin_msa_bseti_d #define __msa_bneg_b __builtin_msa_bneg_b #define __msa_bneg_h __builtin_msa_bneg_h #define __msa_bneg_w __builtin_msa_bneg_w #define __msa_bneg_d __builtin_msa_bneg_d #define __msa_bnegi_b __builtin_msa_bnegi_b #define __msa_bnegi_h __builtin_msa_bnegi_h #define __msa_bnegi_w __builtin_msa_bnegi_w #define __msa_bnegi_d __builtin_msa_bnegi_d #define __msa_binsl_b __builtin_msa_binsl_b #define __msa_binsl_h __builtin_msa_binsl_h #define __msa_binsl_w __builtin_msa_binsl_w #define __msa_binsl_d __builtin_msa_binsl_d #define __msa_binsli_b __builtin_msa_binsli_b #define __msa_binsli_h __builtin_msa_binsli_h #define __msa_binsli_w __builtin_msa_binsli_w #define __msa_binsli_d __builtin_msa_binsli_d #define __msa_binsr_b __builtin_msa_binsr_b #define __msa_binsr_h __builtin_msa_binsr_h #define __msa_binsr_w __builtin_msa_binsr_w #define __msa_binsr_d __builtin_msa_binsr_d #define __msa_binsri_b __builtin_msa_binsri_b #define __msa_binsri_h __builtin_msa_binsri_h #define __msa_binsri_w __builtin_msa_binsri_w #define __msa_binsri_d __builtin_msa_binsri_d #define __msa_addv_b __builtin_msa_addv_b #define __msa_addv_h __builtin_msa_addv_h #define __msa_addv_w __builtin_msa_addv_w #define __msa_addv_d __builtin_msa_addv_d #define __msa_addvi_b __builtin_msa_addvi_b #define __msa_addvi_h __builtin_msa_addvi_h #define __msa_addvi_w __builtin_msa_addvi_w #define __msa_addvi_d __builtin_msa_addvi_d #define __msa_subv_b __builtin_msa_subv_b #define __msa_subv_h __builtin_msa_subv_h #define __msa_subv_w __builtin_msa_subv_w #define __msa_subv_d __builtin_msa_subv_d #define __msa_subvi_b __builtin_msa_subvi_b #define __msa_subvi_h __builtin_msa_subvi_h #define __msa_subvi_w __builtin_msa_subvi_w #define __msa_subvi_d __builtin_msa_subvi_d #define __msa_max_s_b __builtin_msa_max_s_b #define __msa_max_s_h __builtin_msa_max_s_h #define __msa_max_s_w __builtin_msa_max_s_w #define __msa_max_s_d __builtin_msa_max_s_d #define __msa_maxi_s_b __builtin_msa_maxi_s_b #define __msa_maxi_s_h __builtin_msa_maxi_s_h #define __msa_maxi_s_w __builtin_msa_maxi_s_w #define __msa_maxi_s_d __builtin_msa_maxi_s_d #define __msa_max_u_b __builtin_msa_max_u_b #define __msa_max_u_h __builtin_msa_max_u_h #define __msa_max_u_w __builtin_msa_max_u_w #define __msa_max_u_d __builtin_msa_max_u_d #define __msa_maxi_u_b __builtin_msa_maxi_u_b #define __msa_maxi_u_h __builtin_msa_maxi_u_h #define __msa_maxi_u_w __builtin_msa_maxi_u_w #define __msa_maxi_u_d __builtin_msa_maxi_u_d #define __msa_min_s_b __builtin_msa_min_s_b #define __msa_min_s_h __builtin_msa_min_s_h #define __msa_min_s_w __builtin_msa_min_s_w #define __msa_min_s_d __builtin_msa_min_s_d #define __msa_mini_s_b __builtin_msa_mini_s_b #define __msa_mini_s_h __builtin_msa_mini_s_h #define __msa_mini_s_w __builtin_msa_mini_s_w #define __msa_mini_s_d __builtin_msa_mini_s_d #define __msa_min_u_b __builtin_msa_min_u_b #define __msa_min_u_h __builtin_msa_min_u_h #define __msa_min_u_w __builtin_msa_min_u_w #define __msa_min_u_d __builtin_msa_min_u_d #define __msa_mini_u_b __builtin_msa_mini_u_b #define __msa_mini_u_h __builtin_msa_mini_u_h #define __msa_mini_u_w __builtin_msa_mini_u_w #define __msa_mini_u_d __builtin_msa_mini_u_d #define __msa_max_a_b __builtin_msa_max_a_b #define __msa_max_a_h __builtin_msa_max_a_h #define __msa_max_a_w __builtin_msa_max_a_w #define __msa_max_a_d __builtin_msa_max_a_d #define __msa_min_a_b __builtin_msa_min_a_b #define __msa_min_a_h __builtin_msa_min_a_h #define __msa_min_a_w __builtin_msa_min_a_w #define __msa_min_a_d __builtin_msa_min_a_d #define __msa_ceq_b __builtin_msa_ceq_b #define __msa_ceq_h __builtin_msa_ceq_h #define __msa_ceq_w __builtin_msa_ceq_w #define __msa_ceq_d __builtin_msa_ceq_d #define __msa_ceqi_b __builtin_msa_ceqi_b #define __msa_ceqi_h __builtin_msa_ceqi_h #define __msa_ceqi_w __builtin_msa_ceqi_w #define __msa_ceqi_d __builtin_msa_ceqi_d #define __msa_clt_s_b __builtin_msa_clt_s_b #define __msa_clt_s_h __builtin_msa_clt_s_h #define __msa_clt_s_w __builtin_msa_clt_s_w #define __msa_clt_s_d __builtin_msa_clt_s_d #define __msa_clti_s_b __builtin_msa_clti_s_b #define __msa_clti_s_h __builtin_msa_clti_s_h #define __msa_clti_s_w __builtin_msa_clti_s_w #define __msa_clti_s_d __builtin_msa_clti_s_d #define __msa_clt_u_b __builtin_msa_clt_u_b #define __msa_clt_u_h __builtin_msa_clt_u_h #define __msa_clt_u_w __builtin_msa_clt_u_w #define __msa_clt_u_d __builtin_msa_clt_u_d #define __msa_clti_u_b __builtin_msa_clti_u_b #define __msa_clti_u_h __builtin_msa_clti_u_h #define __msa_clti_u_w __builtin_msa_clti_u_w #define __msa_clti_u_d __builtin_msa_clti_u_d #define __msa_cle_s_b __builtin_msa_cle_s_b #define __msa_cle_s_h __builtin_msa_cle_s_h #define __msa_cle_s_w __builtin_msa_cle_s_w #define __msa_cle_s_d __builtin_msa_cle_s_d #define __msa_clei_s_b __builtin_msa_clei_s_b #define __msa_clei_s_h __builtin_msa_clei_s_h #define __msa_clei_s_w __builtin_msa_clei_s_w #define __msa_clei_s_d __builtin_msa_clei_s_d #define __msa_cle_u_b __builtin_msa_cle_u_b #define __msa_cle_u_h __builtin_msa_cle_u_h #define __msa_cle_u_w __builtin_msa_cle_u_w #define __msa_cle_u_d __builtin_msa_cle_u_d #define __msa_clei_u_b __builtin_msa_clei_u_b #define __msa_clei_u_h __builtin_msa_clei_u_h #define __msa_clei_u_w __builtin_msa_clei_u_w #define __msa_clei_u_d __builtin_msa_clei_u_d #define __msa_ld_b __builtin_msa_ld_b #define __msa_ld_h __builtin_msa_ld_h #define __msa_ld_w __builtin_msa_ld_w #define __msa_ld_d __builtin_msa_ld_d #define __msa_ldr_d __builtin_msa_ldr_d #define __msa_ldr_w __builtin_msa_ldrq_w #define __msa_st_b __builtin_msa_st_b #define __msa_st_h __builtin_msa_st_h #define __msa_st_w __builtin_msa_st_w #define __msa_st_d __builtin_msa_st_d #define __msa_str_d __builtin_msa_str_d #define __msa_str_w __builtin_msa_strq_w #define __msa_sat_s_b __builtin_msa_sat_s_b #define __msa_sat_s_h __builtin_msa_sat_s_h #define __msa_sat_s_w __builtin_msa_sat_s_w #define __msa_sat_s_d __builtin_msa_sat_s_d #define __msa_sat_u_b __builtin_msa_sat_u_b #define __msa_sat_u_h __builtin_msa_sat_u_h #define __msa_sat_u_w __builtin_msa_sat_u_w #define __msa_sat_u_d __builtin_msa_sat_u_d #define __msa_add_a_b __builtin_msa_add_a_b #define __msa_add_a_h __builtin_msa_add_a_h #define __msa_add_a_w __builtin_msa_add_a_w #define __msa_add_a_d __builtin_msa_add_a_d #define __msa_adds_a_b __builtin_msa_adds_a_b #define __msa_adds_a_h __builtin_msa_adds_a_h #define __msa_adds_a_w __builtin_msa_adds_a_w #define __msa_adds_a_d __builtin_msa_adds_a_d #define __msa_adds_s_b __builtin_msa_adds_s_b #define __msa_adds_s_h __builtin_msa_adds_s_h #define __msa_adds_s_w __builtin_msa_adds_s_w #define __msa_adds_s_d __builtin_msa_adds_s_d #define __msa_adds_u_b __builtin_msa_adds_u_b #define __msa_adds_u_h __builtin_msa_adds_u_h #define __msa_adds_u_w __builtin_msa_adds_u_w #define __msa_adds_u_d __builtin_msa_adds_u_d #define __msa_ave_s_b __builtin_msa_ave_s_b #define __msa_ave_s_h __builtin_msa_ave_s_h #define __msa_ave_s_w __builtin_msa_ave_s_w #define __msa_ave_s_d __builtin_msa_ave_s_d #define __msa_ave_u_b __builtin_msa_ave_u_b #define __msa_ave_u_h __builtin_msa_ave_u_h #define __msa_ave_u_w __builtin_msa_ave_u_w #define __msa_ave_u_d __builtin_msa_ave_u_d #define __msa_aver_s_b __builtin_msa_aver_s_b #define __msa_aver_s_h __builtin_msa_aver_s_h #define __msa_aver_s_w __builtin_msa_aver_s_w #define __msa_aver_s_d __builtin_msa_aver_s_d #define __msa_aver_u_b __builtin_msa_aver_u_b #define __msa_aver_u_h __builtin_msa_aver_u_h #define __msa_aver_u_w __builtin_msa_aver_u_w #define __msa_aver_u_d __builtin_msa_aver_u_d #define __msa_subs_s_b __builtin_msa_subs_s_b #define __msa_subs_s_h __builtin_msa_subs_s_h #define __msa_subs_s_w __builtin_msa_subs_s_w #define __msa_subs_s_d __builtin_msa_subs_s_d #define __msa_subs_u_b __builtin_msa_subs_u_b #define __msa_subs_u_h __builtin_msa_subs_u_h #define __msa_subs_u_w __builtin_msa_subs_u_w #define __msa_subs_u_d __builtin_msa_subs_u_d #define __msa_subsuu_s_b __builtin_msa_subsuu_s_b #define __msa_subsuu_s_h __builtin_msa_subsuu_s_h #define __msa_subsuu_s_w __builtin_msa_subsuu_s_w #define __msa_subsuu_s_d __builtin_msa_subsuu_s_d #define __msa_subsus_u_b __builtin_msa_subsus_u_b #define __msa_subsus_u_h __builtin_msa_subsus_u_h #define __msa_subsus_u_w __builtin_msa_subsus_u_w #define __msa_subsus_u_d __builtin_msa_subsus_u_d #define __msa_asub_s_b __builtin_msa_asub_s_b #define __msa_asub_s_h __builtin_msa_asub_s_h #define __msa_asub_s_w __builtin_msa_asub_s_w #define __msa_asub_s_d __builtin_msa_asub_s_d #define __msa_asub_u_b __builtin_msa_asub_u_b #define __msa_asub_u_h __builtin_msa_asub_u_h #define __msa_asub_u_w __builtin_msa_asub_u_w #define __msa_asub_u_d __builtin_msa_asub_u_d #define __msa_mulv_b __builtin_msa_mulv_b #define __msa_mulv_h __builtin_msa_mulv_h #define __msa_mulv_w __builtin_msa_mulv_w #define __msa_mulv_d __builtin_msa_mulv_d #define __msa_maddv_b __builtin_msa_maddv_b #define __msa_maddv_h __builtin_msa_maddv_h #define __msa_maddv_w __builtin_msa_maddv_w #define __msa_maddv_d __builtin_msa_maddv_d #define __msa_msubv_b __builtin_msa_msubv_b #define __msa_msubv_h __builtin_msa_msubv_h #define __msa_msubv_w __builtin_msa_msubv_w #define __msa_msubv_d __builtin_msa_msubv_d #define __msa_div_s_b __builtin_msa_div_s_b #define __msa_div_s_h __builtin_msa_div_s_h #define __msa_div_s_w __builtin_msa_div_s_w #define __msa_div_s_d __builtin_msa_div_s_d #define __msa_div_u_b __builtin_msa_div_u_b #define __msa_div_u_h __builtin_msa_div_u_h #define __msa_div_u_w __builtin_msa_div_u_w #define __msa_div_u_d __builtin_msa_div_u_d #define __msa_hadd_s_h __builtin_msa_hadd_s_h #define __msa_hadd_s_w __builtin_msa_hadd_s_w #define __msa_hadd_s_d __builtin_msa_hadd_s_d #define __msa_hadd_u_h __builtin_msa_hadd_u_h #define __msa_hadd_u_w __builtin_msa_hadd_u_w #define __msa_hadd_u_d __builtin_msa_hadd_u_d #define __msa_hsub_s_h __builtin_msa_hsub_s_h #define __msa_hsub_s_w __builtin_msa_hsub_s_w #define __msa_hsub_s_d __builtin_msa_hsub_s_d #define __msa_hsub_u_h __builtin_msa_hsub_u_h #define __msa_hsub_u_w __builtin_msa_hsub_u_w #define __msa_hsub_u_d __builtin_msa_hsub_u_d #define __msa_mod_s_b __builtin_msa_mod_s_b #define __msa_mod_s_h __builtin_msa_mod_s_h #define __msa_mod_s_w __builtin_msa_mod_s_w #define __msa_mod_s_d __builtin_msa_mod_s_d #define __msa_mod_u_b __builtin_msa_mod_u_b #define __msa_mod_u_h __builtin_msa_mod_u_h #define __msa_mod_u_w __builtin_msa_mod_u_w #define __msa_mod_u_d __builtin_msa_mod_u_d #define __msa_dotp_s_h __builtin_msa_dotp_s_h #define __msa_dotp_s_w __builtin_msa_dotp_s_w #define __msa_dotp_s_d __builtin_msa_dotp_s_d #define __msa_dotp_u_h __builtin_msa_dotp_u_h #define __msa_dotp_u_w __builtin_msa_dotp_u_w #define __msa_dotp_u_d __builtin_msa_dotp_u_d #define __msa_dpadd_s_h __builtin_msa_dpadd_s_h #define __msa_dpadd_s_w __builtin_msa_dpadd_s_w #define __msa_dpadd_s_d __builtin_msa_dpadd_s_d #define __msa_dpadd_u_h __builtin_msa_dpadd_u_h #define __msa_dpadd_u_w __builtin_msa_dpadd_u_w #define __msa_dpadd_u_d __builtin_msa_dpadd_u_d #define __msa_dpsub_s_h __builtin_msa_dpsub_s_h #define __msa_dpsub_s_w __builtin_msa_dpsub_s_w #define __msa_dpsub_s_d __builtin_msa_dpsub_s_d #define __msa_dpsub_u_h __builtin_msa_dpsub_u_h #define __msa_dpsub_u_w __builtin_msa_dpsub_u_w #define __msa_dpsub_u_d __builtin_msa_dpsub_u_d #define __msa_sld_b __builtin_msa_sld_b #define __msa_sld_h __builtin_msa_sld_h #define __msa_sld_w __builtin_msa_sld_w #define __msa_sld_d __builtin_msa_sld_d #define __msa_sldi_b __builtin_msa_sldi_b #define __msa_sldi_h __builtin_msa_sldi_h #define __msa_sldi_w __builtin_msa_sldi_w #define __msa_sldi_d __builtin_msa_sldi_d #define __msa_splat_b __builtin_msa_splat_b #define __msa_splat_h __builtin_msa_splat_h #define __msa_splat_w __builtin_msa_splat_w #define __msa_splat_d __builtin_msa_splat_d #define __msa_splati_b __builtin_msa_splati_b #define __msa_splati_h __builtin_msa_splati_h #define __msa_splati_w __builtin_msa_splati_w #define __msa_splati_d __builtin_msa_splati_d #define __msa_pckev_b __builtin_msa_pckev_b #define __msa_pckev_h __builtin_msa_pckev_h #define __msa_pckev_w __builtin_msa_pckev_w #define __msa_pckev_d __builtin_msa_pckev_d #define __msa_pckod_b __builtin_msa_pckod_b #define __msa_pckod_h __builtin_msa_pckod_h #define __msa_pckod_w __builtin_msa_pckod_w #define __msa_pckod_d __builtin_msa_pckod_d #define __msa_ilvl_b __builtin_msa_ilvl_b #define __msa_ilvl_h __builtin_msa_ilvl_h #define __msa_ilvl_w __builtin_msa_ilvl_w #define __msa_ilvl_d __builtin_msa_ilvl_d #define __msa_ilvr_b __builtin_msa_ilvr_b #define __msa_ilvr_h __builtin_msa_ilvr_h #define __msa_ilvr_w __builtin_msa_ilvr_w #define __msa_ilvr_d __builtin_msa_ilvr_d #define __msa_ilvev_b __builtin_msa_ilvev_b #define __msa_ilvev_h __builtin_msa_ilvev_h #define __msa_ilvev_w __builtin_msa_ilvev_w #define __msa_ilvev_d __builtin_msa_ilvev_d #define __msa_ilvod_b __builtin_msa_ilvod_b #define __msa_ilvod_h __builtin_msa_ilvod_h #define __msa_ilvod_w __builtin_msa_ilvod_w #define __msa_ilvod_d __builtin_msa_ilvod_d #define __msa_vshf_b __builtin_msa_vshf_b #define __msa_vshf_h __builtin_msa_vshf_h #define __msa_vshf_w __builtin_msa_vshf_w #define __msa_vshf_d __builtin_msa_vshf_d #define __msa_and_v __builtin_msa_and_v #define __msa_andi_b __builtin_msa_andi_b #define __msa_or_v __builtin_msa_or_v #define __msa_ori_b __builtin_msa_ori_b #define __msa_nor_v __builtin_msa_nor_v #define __msa_nori_b __builtin_msa_nori_b #define __msa_xor_v __builtin_msa_xor_v #define __msa_xori_b __builtin_msa_xori_b #define __msa_bmnz_v __builtin_msa_bmnz_v #define __msa_bmnzi_b __builtin_msa_bmnzi_b #define __msa_bmz_v __builtin_msa_bmz_v #define __msa_bmzi_b __builtin_msa_bmzi_b #define __msa_bsel_v __builtin_msa_bsel_v #define __msa_bseli_b __builtin_msa_bseli_b #define __msa_shf_b __builtin_msa_shf_b #define __msa_shf_h __builtin_msa_shf_h #define __msa_shf_w __builtin_msa_shf_w #define __msa_test_bnz_v __builtin_msa_bnz_v #define __msa_test_bz_v __builtin_msa_bz_v #define __msa_fill_b __builtin_msa_fill_b #define __msa_fill_h __builtin_msa_fill_h #define __msa_fill_w __builtin_msa_fill_w #define __msa_fill_d __builtin_msa_fill_d #define __msa_pcnt_b __builtin_msa_pcnt_b #define __msa_pcnt_h __builtin_msa_pcnt_h #define __msa_pcnt_w __builtin_msa_pcnt_w #define __msa_pcnt_d __builtin_msa_pcnt_d #define __msa_nloc_b __builtin_msa_nloc_b #define __msa_nloc_h __builtin_msa_nloc_h #define __msa_nloc_w __builtin_msa_nloc_w #define __msa_nloc_d __builtin_msa_nloc_d #define __msa_nlzc_b __builtin_msa_nlzc_b #define __msa_nlzc_h __builtin_msa_nlzc_h #define __msa_nlzc_w __builtin_msa_nlzc_w #define __msa_nlzc_d __builtin_msa_nlzc_d #define __msa_copy_s_b __builtin_msa_copy_s_b #define __msa_copy_s_h __builtin_msa_copy_s_h #define __msa_copy_s_w __builtin_msa_copy_s_w #define __msa_copy_s_d __builtin_msa_copy_s_d #define __msa_copy_u_b __builtin_msa_copy_u_b #define __msa_copy_u_h __builtin_msa_copy_u_h #define __msa_copy_u_w __builtin_msa_copy_u_w #define __msa_copy_u_d __builtin_msa_copy_u_d #define __msa_insert_b __builtin_msa_insert_b #define __msa_insert_h __builtin_msa_insert_h #define __msa_insert_w __builtin_msa_insert_w #define __msa_insert_d __builtin_msa_insert_d #define __msa_insve_b __builtin_msa_insve_b #define __msa_insve_h __builtin_msa_insve_h #define __msa_insve_w __builtin_msa_insve_w #define __msa_insve_d __builtin_msa_insve_d #define __msa_test_bnz_b __builtin_msa_bnz_b #define __msa_test_bnz_h __builtin_msa_bnz_h #define __msa_test_bnz_w __builtin_msa_bnz_w #define __msa_test_bnz_d __builtin_msa_bnz_d #define __msa_test_bz_b __builtin_msa_bz_b #define __msa_test_bz_h __builtin_msa_bz_h #define __msa_test_bz_w __builtin_msa_bz_w #define __msa_test_bz_d __builtin_msa_bz_d #define __msa_ldi_b __builtin_msa_ldi_b #define __msa_ldi_h __builtin_msa_ldi_h #define __msa_ldi_w __builtin_msa_ldi_w #define __msa_ldi_d __builtin_msa_ldi_d #define __msa_fcaf_w __builtin_msa_fcaf_w #define __msa_fcaf_d __builtin_msa_fcaf_d #define __msa_fcor_w __builtin_msa_fcor_w #define __msa_fcor_d __builtin_msa_fcor_d #define __msa_fcun_w __builtin_msa_fcun_w #define __msa_fcun_d __builtin_msa_fcun_d #define __msa_fcune_w __builtin_msa_fcune_w #define __msa_fcune_d __builtin_msa_fcune_d #define __msa_fcueq_w __builtin_msa_fcueq_w #define __msa_fcueq_d __builtin_msa_fcueq_d #define __msa_fceq_w __builtin_msa_fceq_w #define __msa_fceq_d __builtin_msa_fceq_d #define __msa_fcne_w __builtin_msa_fcne_w #define __msa_fcne_d __builtin_msa_fcne_d #define __msa_fclt_w __builtin_msa_fclt_w #define __msa_fclt_d __builtin_msa_fclt_d #define __msa_fcult_w __builtin_msa_fcult_w #define __msa_fcult_d __builtin_msa_fcult_d #define __msa_fcle_w __builtin_msa_fcle_w #define __msa_fcle_d __builtin_msa_fcle_d #define __msa_fcule_w __builtin_msa_fcule_w #define __msa_fcule_d __builtin_msa_fcule_d #define __msa_fsaf_w __builtin_msa_fsaf_w #define __msa_fsaf_d __builtin_msa_fsaf_d #define __msa_fsor_w __builtin_msa_fsor_w #define __msa_fsor_d __builtin_msa_fsor_d #define __msa_fsun_w __builtin_msa_fsun_w #define __msa_fsun_d __builtin_msa_fsun_d #define __msa_fsune_w __builtin_msa_fsune_w #define __msa_fsune_d __builtin_msa_fsune_d #define __msa_fsueq_w __builtin_msa_fsueq_w #define __msa_fsueq_d __builtin_msa_fsueq_d #define __msa_fseq_w __builtin_msa_fseq_w #define __msa_fseq_d __builtin_msa_fseq_d #define __msa_fsne_w __builtin_msa_fsne_w #define __msa_fsne_d __builtin_msa_fsne_d #define __msa_fslt_w __builtin_msa_fslt_w #define __msa_fslt_d __builtin_msa_fslt_d #define __msa_fsult_w __builtin_msa_fsult_w #define __msa_fsult_d __builtin_msa_fsult_d #define __msa_fsle_w __builtin_msa_fsle_w #define __msa_fsle_d __builtin_msa_fsle_d #define __msa_fsule_w __builtin_msa_fsule_w #define __msa_fsule_d __builtin_msa_fsule_d #define __msa_fadd_w __builtin_msa_fadd_w #define __msa_fadd_d __builtin_msa_fadd_d #define __msa_fsub_w __builtin_msa_fsub_w #define __msa_fsub_d __builtin_msa_fsub_d #define __msa_fmul_w __builtin_msa_fmul_w #define __msa_fmul_d __builtin_msa_fmul_d #define __msa_fdiv_w __builtin_msa_fdiv_w #define __msa_fdiv_d __builtin_msa_fdiv_d #define __msa_fmadd_w __builtin_msa_fmadd_w #define __msa_fmadd_d __builtin_msa_fmadd_d #define __msa_fmsub_w __builtin_msa_fmsub_w #define __msa_fmsub_d __builtin_msa_fmsub_d #define __msa_fexp2_w __builtin_msa_fexp2_w #define __msa_fexp2_d __builtin_msa_fexp2_d #define __msa_fexdo_h __builtin_msa_fexdo_h #define __msa_fexdo_w __builtin_msa_fexdo_w #define __msa_ftq_h __builtin_msa_ftq_h #define __msa_ftq_w __builtin_msa_ftq_w #define __msa_fmin_w __builtin_msa_fmin_w #define __msa_fmin_d __builtin_msa_fmin_d #define __msa_fmin_a_w __builtin_msa_fmin_a_w #define __msa_fmin_a_d __builtin_msa_fmin_a_d #define __msa_fmax_w __builtin_msa_fmax_w #define __msa_fmax_d __builtin_msa_fmax_d #define __msa_fmax_a_w __builtin_msa_fmax_a_w #define __msa_fmax_a_d __builtin_msa_fmax_a_d #define __msa_mul_q_h __builtin_msa_mul_q_h #define __msa_mul_q_w __builtin_msa_mul_q_w #define __msa_mulr_q_h __builtin_msa_mulr_q_h #define __msa_mulr_q_w __builtin_msa_mulr_q_w #define __msa_madd_q_h __builtin_msa_madd_q_h #define __msa_madd_q_w __builtin_msa_madd_q_w #define __msa_maddr_q_h __builtin_msa_maddr_q_h #define __msa_maddr_q_w __builtin_msa_maddr_q_w #define __msa_msub_q_h __builtin_msa_msub_q_h #define __msa_msub_q_w __builtin_msa_msub_q_w #define __msa_msubr_q_h __builtin_msa_msubr_q_h #define __msa_msubr_q_w __builtin_msa_msubr_q_w #define __msa_fclass_w __builtin_msa_fclass_w #define __msa_fclass_d __builtin_msa_fclass_d #define __msa_fsqrt_w __builtin_msa_fsqrt_w #define __msa_fsqrt_d __builtin_msa_fsqrt_d #define __msa_frcp_w __builtin_msa_frcp_w #define __msa_frcp_d __builtin_msa_frcp_d #define __msa_frint_w __builtin_msa_frint_w #define __msa_frint_d __builtin_msa_frint_d #define __msa_frsqrt_w __builtin_msa_frsqrt_w #define __msa_frsqrt_d __builtin_msa_frsqrt_d #define __msa_flog2_w __builtin_msa_flog2_w #define __msa_flog2_d __builtin_msa_flog2_d #define __msa_fexupl_w __builtin_msa_fexupl_w #define __msa_fexupl_d __builtin_msa_fexupl_d #define __msa_fexupr_w __builtin_msa_fexupr_w #define __msa_fexupr_d __builtin_msa_fexupr_d #define __msa_ffql_w __builtin_msa_ffql_w #define __msa_ffql_d __builtin_msa_ffql_d #define __msa_ffqr_w __builtin_msa_ffqr_w #define __msa_ffqr_d __builtin_msa_ffqr_d #define __msa_ftint_s_w __builtin_msa_ftint_s_w #define __msa_ftint_s_d __builtin_msa_ftint_s_d #define __msa_ftint_u_w __builtin_msa_ftint_u_w #define __msa_ftint_u_d __builtin_msa_ftint_u_d #define __msa_ftrunc_s_w __builtin_msa_ftrunc_s_w #define __msa_ftrunc_s_d __builtin_msa_ftrunc_s_d #define __msa_ftrunc_u_w __builtin_msa_ftrunc_u_w #define __msa_ftrunc_u_d __builtin_msa_ftrunc_u_d #define __msa_ffint_s_w __builtin_msa_ffint_s_w #define __msa_ffint_s_d __builtin_msa_ffint_s_d #define __msa_ffint_u_w __builtin_msa_ffint_u_w #define __msa_ffint_u_d __builtin_msa_ffint_u_d #define __msa_cfcmsa __builtin_msa_cfcmsa #define __msa_move_v __builtin_msa_move_v #define __msa_cast_to_vector_float __builtin_msa_cast_to_vector_float #define __msa_cast_to_vector_double __builtin_msa_cast_to_vector_double #define __msa_cast_to_scalar_float __builtin_msa_cast_to_scalar_float #define __msa_cast_to_scalar_double __builtin_msa_cast_to_scalar_double #endif /* defined(__mips_msa) */ #endif /* _MSA_H */ /*===---- mwaitxintrin.h - MONITORX/MWAITX intrinsics ----------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __X86INTRIN_H #error "Never use directly; include instead." #endif #ifndef __MWAITXINTRIN_H #define __MWAITXINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mwaitx"))) /// Establishes a linear address memory range to be monitored and puts /// the processor in the monitor event pending state. Data stored in the /// monitored address range causes the processor to exit the pending state. /// /// \headerfile /// /// This intrinsic corresponds to the \c MONITORX instruction. /// /// \param __p /// The memory range to be monitored. The size of the range is determined by /// CPUID function 0000_0005h. /// \param __extensions /// Optional extensions for the monitoring state. /// \param __hints /// Optional hints for the monitoring state. static __inline__ void __DEFAULT_FN_ATTRS _mm_monitorx(void * __p, unsigned __extensions, unsigned __hints) { __builtin_ia32_monitorx(__p, __extensions, __hints); } /// Used with the \c MONITORX instruction to wait while the processor is in /// the monitor event pending state. Data stored in the monitored address /// range, or an interrupt, causes the processor to exit the pending state. /// /// \headerfile /// /// This intrinsic corresponds to the \c MWAITX instruction. /// /// \param __extensions /// Optional extensions for the monitoring state, which can vary by /// processor. /// \param __hints /// Optional hints for the monitoring state, which can vary by processor. static __inline__ void __DEFAULT_FN_ATTRS _mm_mwaitx(unsigned __extensions, unsigned __hints, unsigned __clock) { __builtin_ia32_mwaitx(__extensions, __hints, __clock); } #undef __DEFAULT_FN_ATTRS #endif /* __MWAITXINTRIN_H */ /*===---- nmmintrin.h - SSE4 intrinsics ------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __NMMINTRIN_H #define __NMMINTRIN_H #if !defined(__i386__) && !defined(__x86_64__) #error "This header is only meant to be used on x86 and x64 architecture" #endif /* To match expectations of gcc we put the sse4.2 definitions into smmintrin.h, just include it now then. */ #include #endif /* __NMMINTRIN_H */ //===----- opencl-c-base.h - OpenCL C language base definitions -----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef _OPENCL_BASE_H_ #define _OPENCL_BASE_H_ // Define extension macros #if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) // For SPIR and SPIR-V all extensions are supported. #if defined(__SPIR__) || defined(__SPIRV__) #define cl_khr_subgroup_extended_types 1 #define cl_khr_subgroup_non_uniform_vote 1 #define cl_khr_subgroup_ballot 1 #define cl_khr_subgroup_non_uniform_arithmetic 1 #define cl_khr_subgroup_shuffle 1 #define cl_khr_subgroup_shuffle_relative 1 #define cl_khr_subgroup_clustered_reduce 1 #define cl_khr_subgroup_rotate 1 #define cl_khr_extended_bit_ops 1 #define cl_khr_integer_dot_product 1 #define __opencl_c_integer_dot_product_input_4x8bit 1 #define __opencl_c_integer_dot_product_input_4x8bit_packed 1 #define cl_ext_float_atomics 1 #ifdef cl_khr_fp16 #define __opencl_c_ext_fp16_global_atomic_load_store 1 #define __opencl_c_ext_fp16_local_atomic_load_store 1 #define __opencl_c_ext_fp16_global_atomic_add 1 #define __opencl_c_ext_fp16_local_atomic_add 1 #define __opencl_c_ext_fp16_global_atomic_min_max 1 #define __opencl_c_ext_fp16_local_atomic_min_max 1 #endif #ifdef cl_khr_fp64 #define __opencl_c_ext_fp64_global_atomic_add 1 #define __opencl_c_ext_fp64_local_atomic_add 1 #define __opencl_c_ext_fp64_global_atomic_min_max 1 #define __opencl_c_ext_fp64_local_atomic_min_max 1 #endif #define __opencl_c_ext_fp32_global_atomic_add 1 #define __opencl_c_ext_fp32_local_atomic_add 1 #define __opencl_c_ext_fp32_global_atomic_min_max 1 #define __opencl_c_ext_fp32_local_atomic_min_max 1 #endif // defined(__SPIR__) || defined(__SPIRV__) #endif // (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) // Define feature macros for OpenCL C 2.0 #if (__OPENCL_CPP_VERSION__ == 100 || __OPENCL_C_VERSION__ == 200) #define __opencl_c_pipes 1 #define __opencl_c_generic_address_space 1 #define __opencl_c_work_group_collective_functions 1 #define __opencl_c_atomic_order_acq_rel 1 #define __opencl_c_atomic_order_seq_cst 1 #define __opencl_c_atomic_scope_device 1 #define __opencl_c_atomic_scope_all_devices 1 #define __opencl_c_device_enqueue 1 #define __opencl_c_read_write_images 1 #define __opencl_c_program_scope_global_variables 1 #define __opencl_c_images 1 #endif // Define header-only feature macros for OpenCL C 3.0. #if (__OPENCL_CPP_VERSION__ == 202100 || __OPENCL_C_VERSION__ == 300) // For the SPIR and SPIR-V target all features are supported. #if defined(__SPIR__) || defined(__SPIRV__) #define __opencl_c_work_group_collective_functions 1 #define __opencl_c_atomic_order_seq_cst 1 #define __opencl_c_atomic_scope_device 1 #define __opencl_c_atomic_scope_all_devices 1 #define __opencl_c_read_write_images 1 #endif // defined(__SPIR__) // Undefine any feature macros that have been explicitly disabled using // an __undef_ macro. #ifdef __undef___opencl_c_work_group_collective_functions #undef __opencl_c_work_group_collective_functions #endif #ifdef __undef___opencl_c_atomic_order_seq_cst #undef __opencl_c_atomic_order_seq_cst #endif #ifdef __undef___opencl_c_atomic_scope_device #undef __opencl_c_atomic_scope_device #endif #ifdef __undef___opencl_c_atomic_scope_all_devices #undef __opencl_c_atomic_scope_all_devices #endif #ifdef __undef___opencl_c_read_write_images #undef __opencl_c_read_write_images #endif #endif // (__OPENCL_CPP_VERSION__ == 202100 || __OPENCL_C_VERSION__ == 300) #if !defined(__opencl_c_generic_address_space) // Internal feature macro to provide named (global, local, private) address // space overloads for builtin functions that take a pointer argument. #define __opencl_c_named_address_space_builtins 1 #endif // !defined(__opencl_c_generic_address_space) #if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) || defined(__opencl_c_subgroups) // Internal feature macro to provide subgroup builtins. #define __opencl_subgroup_builtins 1 #endif // built-in scalar data types: /** * An unsigned 8-bit integer. */ typedef unsigned char uchar; /** * An unsigned 16-bit integer. */ typedef unsigned short ushort; /** * An unsigned 32-bit integer. */ typedef unsigned int uint; /** * An unsigned 64-bit integer. */ typedef unsigned long ulong; /** * The unsigned integer type of the result of the sizeof operator. This * is a 32-bit unsigned integer if CL_DEVICE_ADDRESS_BITS * defined in table 4.3 is 32-bits and is a 64-bit unsigned integer if * CL_DEVICE_ADDRESS_BITS is 64-bits. */ typedef __SIZE_TYPE__ size_t; /** * A signed integer type that is the result of subtracting two pointers. * This is a 32-bit signed integer if CL_DEVICE_ADDRESS_BITS * defined in table 4.3 is 32-bits and is a 64-bit signed integer if * CL_DEVICE_ADDRESS_BITS is 64-bits. */ typedef __PTRDIFF_TYPE__ ptrdiff_t; /** * A signed integer type with the property that any valid pointer to * void can be converted to this type, then converted back to pointer * to void, and the result will compare equal to the original pointer. */ typedef __INTPTR_TYPE__ intptr_t; /** * An unsigned integer type with the property that any valid pointer to * void can be converted to this type, then converted back to pointer * to void, and the result will compare equal to the original pointer. */ typedef __UINTPTR_TYPE__ uintptr_t; // built-in vector data types: typedef char char2 __attribute__((ext_vector_type(2))); typedef char char3 __attribute__((ext_vector_type(3))); typedef char char4 __attribute__((ext_vector_type(4))); typedef char char8 __attribute__((ext_vector_type(8))); typedef char char16 __attribute__((ext_vector_type(16))); typedef uchar uchar2 __attribute__((ext_vector_type(2))); typedef uchar uchar3 __attribute__((ext_vector_type(3))); typedef uchar uchar4 __attribute__((ext_vector_type(4))); typedef uchar uchar8 __attribute__((ext_vector_type(8))); typedef uchar uchar16 __attribute__((ext_vector_type(16))); typedef short short2 __attribute__((ext_vector_type(2))); typedef short short3 __attribute__((ext_vector_type(3))); typedef short short4 __attribute__((ext_vector_type(4))); typedef short short8 __attribute__((ext_vector_type(8))); typedef short short16 __attribute__((ext_vector_type(16))); typedef ushort ushort2 __attribute__((ext_vector_type(2))); typedef ushort ushort3 __attribute__((ext_vector_type(3))); typedef ushort ushort4 __attribute__((ext_vector_type(4))); typedef ushort ushort8 __attribute__((ext_vector_type(8))); typedef ushort ushort16 __attribute__((ext_vector_type(16))); typedef int int2 __attribute__((ext_vector_type(2))); typedef int int3 __attribute__((ext_vector_type(3))); typedef int int4 __attribute__((ext_vector_type(4))); typedef int int8 __attribute__((ext_vector_type(8))); typedef int int16 __attribute__((ext_vector_type(16))); typedef uint uint2 __attribute__((ext_vector_type(2))); typedef uint uint3 __attribute__((ext_vector_type(3))); typedef uint uint4 __attribute__((ext_vector_type(4))); typedef uint uint8 __attribute__((ext_vector_type(8))); typedef uint uint16 __attribute__((ext_vector_type(16))); typedef long long2 __attribute__((ext_vector_type(2))); typedef long long3 __attribute__((ext_vector_type(3))); typedef long long4 __attribute__((ext_vector_type(4))); typedef long long8 __attribute__((ext_vector_type(8))); typedef long long16 __attribute__((ext_vector_type(16))); typedef ulong ulong2 __attribute__((ext_vector_type(2))); typedef ulong ulong3 __attribute__((ext_vector_type(3))); typedef ulong ulong4 __attribute__((ext_vector_type(4))); typedef ulong ulong8 __attribute__((ext_vector_type(8))); typedef ulong ulong16 __attribute__((ext_vector_type(16))); typedef float float2 __attribute__((ext_vector_type(2))); typedef float float3 __attribute__((ext_vector_type(3))); typedef float float4 __attribute__((ext_vector_type(4))); typedef float float8 __attribute__((ext_vector_type(8))); typedef float float16 __attribute__((ext_vector_type(16))); #ifdef cl_khr_fp16 #pragma OPENCL EXTENSION cl_khr_fp16 : enable typedef half half2 __attribute__((ext_vector_type(2))); typedef half half3 __attribute__((ext_vector_type(3))); typedef half half4 __attribute__((ext_vector_type(4))); typedef half half8 __attribute__((ext_vector_type(8))); typedef half half16 __attribute__((ext_vector_type(16))); #endif #ifdef cl_khr_fp64 #if __OPENCL_C_VERSION__ < CL_VERSION_1_2 #pragma OPENCL EXTENSION cl_khr_fp64 : enable #endif typedef double double2 __attribute__((ext_vector_type(2))); typedef double double3 __attribute__((ext_vector_type(3))); typedef double double4 __attribute__((ext_vector_type(4))); typedef double double8 __attribute__((ext_vector_type(8))); typedef double double16 __attribute__((ext_vector_type(16))); #endif // An internal alias for half, for use by OpenCLBuiltins.td. #define __half half #if defined(__OPENCL_CPP_VERSION__) #define NULL nullptr #elif defined(__OPENCL_C_VERSION__) #define NULL ((void*)0) #endif /** * Value of maximum non-infinite single-precision floating-point * number. */ #define MAXFLOAT 0x1.fffffep127f /** * A positive float constant expression. HUGE_VALF evaluates * to +infinity. Used as an error value returned by the built-in * math functions. */ #define HUGE_VALF (__builtin_huge_valf()) /** * A positive double constant expression. HUGE_VAL evaluates * to +infinity. Used as an error value returned by the built-in * math functions. */ #define HUGE_VAL (__builtin_huge_val()) /** * A constant expression of type float representing positive or * unsigned infinity. */ #define INFINITY (__builtin_inff()) /** * A constant expression of type float representing a quiet NaN. */ #define NAN as_float(INT_MAX) #define FP_ILOGB0 INT_MIN #define FP_ILOGBNAN INT_MAX #define FLT_DIG 6 #define FLT_MANT_DIG 24 #define FLT_MAX_10_EXP +38 #define FLT_MAX_EXP +128 #define FLT_MIN_10_EXP -37 #define FLT_MIN_EXP -125 #define FLT_RADIX 2 #define FLT_MAX 0x1.fffffep127f #define FLT_MIN 0x1.0p-126f #define FLT_EPSILON 0x1.0p-23f #define M_E_F 2.71828182845904523536028747135266250f #define M_LOG2E_F 1.44269504088896340735992468100189214f #define M_LOG10E_F 0.434294481903251827651128918916605082f #define M_LN2_F 0.693147180559945309417232121458176568f #define M_LN10_F 2.30258509299404568401799145468436421f #define M_PI_F 3.14159265358979323846264338327950288f #define M_PI_2_F 1.57079632679489661923132169163975144f #define M_PI_4_F 0.785398163397448309615660845819875721f #define M_1_PI_F 0.318309886183790671537767526745028724f #define M_2_PI_F 0.636619772367581343075535053490057448f #define M_2_SQRTPI_F 1.12837916709551257389615890312154517f #define M_SQRT2_F 1.41421356237309504880168872420969808f #define M_SQRT1_2_F 0.707106781186547524400844362104849039f #define DBL_DIG 15 #define DBL_MANT_DIG 53 #define DBL_MAX_10_EXP +308 #define DBL_MAX_EXP +1024 #define DBL_MIN_10_EXP -307 #define DBL_MIN_EXP -1021 #define DBL_RADIX 2 #define DBL_MAX 0x1.fffffffffffffp1023 #define DBL_MIN 0x1.0p-1022 #define DBL_EPSILON 0x1.0p-52 #define M_E 0x1.5bf0a8b145769p+1 #define M_LOG2E 0x1.71547652b82fep+0 #define M_LOG10E 0x1.bcb7b1526e50ep-2 #define M_LN2 0x1.62e42fefa39efp-1 #define M_LN10 0x1.26bb1bbb55516p+1 #define M_PI 0x1.921fb54442d18p+1 #define M_PI_2 0x1.921fb54442d18p+0 #define M_PI_4 0x1.921fb54442d18p-1 #define M_1_PI 0x1.45f306dc9c883p-2 #define M_2_PI 0x1.45f306dc9c883p-1 #define M_2_SQRTPI 0x1.20dd750429b6dp+0 #define M_SQRT2 0x1.6a09e667f3bcdp+0 #define M_SQRT1_2 0x1.6a09e667f3bcdp-1 #ifdef cl_khr_fp16 #define HALF_DIG 3 #define HALF_MANT_DIG 11 #define HALF_MAX_10_EXP +4 #define HALF_MAX_EXP +16 #define HALF_MIN_10_EXP -4 #define HALF_MIN_EXP -13 #define HALF_RADIX 2 #define HALF_MAX ((0x1.ffcp15h)) #define HALF_MIN ((0x1.0p-14h)) #define HALF_EPSILON ((0x1.0p-10h)) #define M_E_H 2.71828182845904523536028747135266250h #define M_LOG2E_H 1.44269504088896340735992468100189214h #define M_LOG10E_H 0.434294481903251827651128918916605082h #define M_LN2_H 0.693147180559945309417232121458176568h #define M_LN10_H 2.30258509299404568401799145468436421h #define M_PI_H 3.14159265358979323846264338327950288h #define M_PI_2_H 1.57079632679489661923132169163975144h #define M_PI_4_H 0.785398163397448309615660845819875721h #define M_1_PI_H 0.318309886183790671537767526745028724h #define M_2_PI_H 0.636619772367581343075535053490057448h #define M_2_SQRTPI_H 1.12837916709551257389615890312154517h #define M_SQRT2_H 1.41421356237309504880168872420969808h #define M_SQRT1_2_H 0.707106781186547524400844362104849039h #endif //cl_khr_fp16 #define CHAR_BIT 8 #define SCHAR_MAX 127 #define SCHAR_MIN (-128) #define UCHAR_MAX 255 #define CHAR_MAX SCHAR_MAX #define CHAR_MIN SCHAR_MIN #define USHRT_MAX 65535 #define SHRT_MAX 32767 #define SHRT_MIN (-32768) #define UINT_MAX 0xffffffff #define INT_MAX 2147483647 #define INT_MIN (-2147483647-1) #define ULONG_MAX 0xffffffffffffffffUL #define LONG_MAX 0x7fffffffffffffffL #define LONG_MIN (-0x7fffffffffffffffL-1) // OpenCL v1.1 s6.11.8, v1.2 s6.12.8, v2.0 s6.13.8 - Synchronization Functions // Flag type and values for barrier, mem_fence, read_mem_fence, write_mem_fence typedef uint cl_mem_fence_flags; /** * Queue a memory fence to ensure correct * ordering of memory operations to local memory */ #define CLK_LOCAL_MEM_FENCE 0x01 /** * Queue a memory fence to ensure correct * ordering of memory operations to global memory */ #define CLK_GLOBAL_MEM_FENCE 0x02 #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) typedef enum memory_scope { memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, #if defined(__opencl_c_atomic_scope_all_devices) memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) memory_scope_all_devices = memory_scope_all_svm_devices, #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) #endif // defined(__opencl_c_atomic_scope_all_devices) /** * Subgroups have different requirements on forward progress, so just test * all the relevant macros. * CL 3.0 sub-groups "they are not guaranteed to make independent forward progress" * KHR subgroups "Subgroups within a workgroup are independent, make forward progress with respect to each other" */ #if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) || defined(__opencl_c_subgroups) memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP #endif } memory_scope; /** * Queue a memory fence to ensure correct ordering of memory * operations between work-items of a work-group to * image memory. */ #define CLK_IMAGE_MEM_FENCE 0x04 #ifndef ATOMIC_VAR_INIT #define ATOMIC_VAR_INIT(x) (x) #endif //ATOMIC_VAR_INIT #define ATOMIC_FLAG_INIT 0 // enum values aligned with what clang uses in EmitAtomicExpr() typedef enum memory_order { memory_order_relaxed = __ATOMIC_RELAXED, memory_order_acquire = __ATOMIC_ACQUIRE, memory_order_release = __ATOMIC_RELEASE, memory_order_acq_rel = __ATOMIC_ACQ_REL, #if defined(__opencl_c_atomic_order_seq_cst) memory_order_seq_cst = __ATOMIC_SEQ_CST #endif } memory_order; #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) // OpenCL v1.1 s6.11.3, v1.2 s6.12.14, v2.0 s6.13.14 - Image Read and Write Functions // These values need to match the runtime equivalent // // Addressing Mode. // #define CLK_ADDRESS_NONE 0 #define CLK_ADDRESS_CLAMP_TO_EDGE 2 #define CLK_ADDRESS_CLAMP 4 #define CLK_ADDRESS_REPEAT 6 #define CLK_ADDRESS_MIRRORED_REPEAT 8 // // Coordination Normalization // #define CLK_NORMALIZED_COORDS_FALSE 0 #define CLK_NORMALIZED_COORDS_TRUE 1 // // Filtering Mode. // #define CLK_FILTER_NEAREST 0x10 #define CLK_FILTER_LINEAR 0x20 #ifdef cl_khr_gl_msaa_sharing #pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable #endif //cl_khr_gl_msaa_sharing // // Channel Datatype. // #define CLK_SNORM_INT8 0x10D0 #define CLK_SNORM_INT16 0x10D1 #define CLK_UNORM_INT8 0x10D2 #define CLK_UNORM_INT16 0x10D3 #define CLK_UNORM_SHORT_565 0x10D4 #define CLK_UNORM_SHORT_555 0x10D5 #define CLK_UNORM_INT_101010 0x10D6 #define CLK_SIGNED_INT8 0x10D7 #define CLK_SIGNED_INT16 0x10D8 #define CLK_SIGNED_INT32 0x10D9 #define CLK_UNSIGNED_INT8 0x10DA #define CLK_UNSIGNED_INT16 0x10DB #define CLK_UNSIGNED_INT32 0x10DC #define CLK_HALF_FLOAT 0x10DD #define CLK_FLOAT 0x10DE #define CLK_UNORM_INT24 0x10DF #if __OPENCL_C_VERSION__ >= CL_VERSION_3_0 #define CLK_UNORM_INT_101010_2 0x10E0 #endif // __OPENCL_C_VERSION__ >= CL_VERSION_3_0 // Channel order, numbering must be aligned with cl_channel_order in cl.h // #define CLK_R 0x10B0 #define CLK_A 0x10B1 #define CLK_RG 0x10B2 #define CLK_RA 0x10B3 #define CLK_RGB 0x10B4 #define CLK_RGBA 0x10B5 #define CLK_BGRA 0x10B6 #define CLK_ARGB 0x10B7 #define CLK_INTENSITY 0x10B8 #define CLK_LUMINANCE 0x10B9 #define CLK_Rx 0x10BA #define CLK_RGx 0x10BB #define CLK_RGBx 0x10BC #define CLK_DEPTH 0x10BD #define CLK_DEPTH_STENCIL 0x10BE #if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 #define CLK_sRGB 0x10BF #define CLK_sRGBx 0x10C0 #define CLK_sRGBA 0x10C1 #define CLK_sBGRA 0x10C2 #define CLK_ABGR 0x10C3 #endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 // OpenCL v2.0 s6.13.16 - Pipe Functions #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) #define CLK_NULL_RESERVE_ID (__builtin_astype(((void*)(__SIZE_MAX__)), reserve_id_t)) // OpenCL v2.0 s6.13.17 - Enqueue Kernels #define CL_COMPLETE 0x0 #define CL_RUNNING 0x1 #define CL_SUBMITTED 0x2 #define CL_QUEUED 0x3 #define CLK_SUCCESS 0 #define CLK_ENQUEUE_FAILURE -101 #define CLK_INVALID_QUEUE -102 #define CLK_INVALID_NDRANGE -160 #define CLK_INVALID_EVENT_WAIT_LIST -57 #define CLK_DEVICE_QUEUE_FULL -161 #define CLK_INVALID_ARG_SIZE -51 #define CLK_EVENT_ALLOCATION_FAILURE -100 #define CLK_OUT_OF_RESOURCES -5 #define CLK_NULL_QUEUE 0 #define CLK_NULL_EVENT (__builtin_astype(((__SIZE_MAX__)), clk_event_t)) // execution model related definitions #define CLK_ENQUEUE_FLAGS_NO_WAIT 0x0 #define CLK_ENQUEUE_FLAGS_WAIT_KERNEL 0x1 #define CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP 0x2 typedef int kernel_enqueue_flags_t; typedef int clk_profiling_info; // Profiling info name (see capture_event_profiling_info) #define CLK_PROFILING_COMMAND_EXEC_TIME 0x1 #define MAX_WORK_DIM 3 #ifdef __opencl_c_device_enqueue typedef struct { unsigned int workDimension; size_t globalWorkOffset[MAX_WORK_DIM]; size_t globalWorkSize[MAX_WORK_DIM]; size_t localWorkSize[MAX_WORK_DIM]; } ndrange_t; #endif // __opencl_c_device_enqueue #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) /** * OpenCL v1.1/1.2/2.0 s6.2.4.2 - as_type operators * Reinterprets a data type as another data type of the same size */ #define as_char(x) __builtin_astype((x), char) #define as_char2(x) __builtin_astype((x), char2) #define as_char3(x) __builtin_astype((x), char3) #define as_char4(x) __builtin_astype((x), char4) #define as_char8(x) __builtin_astype((x), char8) #define as_char16(x) __builtin_astype((x), char16) #define as_uchar(x) __builtin_astype((x), uchar) #define as_uchar2(x) __builtin_astype((x), uchar2) #define as_uchar3(x) __builtin_astype((x), uchar3) #define as_uchar4(x) __builtin_astype((x), uchar4) #define as_uchar8(x) __builtin_astype((x), uchar8) #define as_uchar16(x) __builtin_astype((x), uchar16) #define as_short(x) __builtin_astype((x), short) #define as_short2(x) __builtin_astype((x), short2) #define as_short3(x) __builtin_astype((x), short3) #define as_short4(x) __builtin_astype((x), short4) #define as_short8(x) __builtin_astype((x), short8) #define as_short16(x) __builtin_astype((x), short16) #define as_ushort(x) __builtin_astype((x), ushort) #define as_ushort2(x) __builtin_astype((x), ushort2) #define as_ushort3(x) __builtin_astype((x), ushort3) #define as_ushort4(x) __builtin_astype((x), ushort4) #define as_ushort8(x) __builtin_astype((x), ushort8) #define as_ushort16(x) __builtin_astype((x), ushort16) #define as_int(x) __builtin_astype((x), int) #define as_int2(x) __builtin_astype((x), int2) #define as_int3(x) __builtin_astype((x), int3) #define as_int4(x) __builtin_astype((x), int4) #define as_int8(x) __builtin_astype((x), int8) #define as_int16(x) __builtin_astype((x), int16) #define as_uint(x) __builtin_astype((x), uint) #define as_uint2(x) __builtin_astype((x), uint2) #define as_uint3(x) __builtin_astype((x), uint3) #define as_uint4(x) __builtin_astype((x), uint4) #define as_uint8(x) __builtin_astype((x), uint8) #define as_uint16(x) __builtin_astype((x), uint16) #define as_long(x) __builtin_astype((x), long) #define as_long2(x) __builtin_astype((x), long2) #define as_long3(x) __builtin_astype((x), long3) #define as_long4(x) __builtin_astype((x), long4) #define as_long8(x) __builtin_astype((x), long8) #define as_long16(x) __builtin_astype((x), long16) #define as_ulong(x) __builtin_astype((x), ulong) #define as_ulong2(x) __builtin_astype((x), ulong2) #define as_ulong3(x) __builtin_astype((x), ulong3) #define as_ulong4(x) __builtin_astype((x), ulong4) #define as_ulong8(x) __builtin_astype((x), ulong8) #define as_ulong16(x) __builtin_astype((x), ulong16) #define as_float(x) __builtin_astype((x), float) #define as_float2(x) __builtin_astype((x), float2) #define as_float3(x) __builtin_astype((x), float3) #define as_float4(x) __builtin_astype((x), float4) #define as_float8(x) __builtin_astype((x), float8) #define as_float16(x) __builtin_astype((x), float16) #ifdef cl_khr_fp64 #define as_double(x) __builtin_astype((x), double) #define as_double2(x) __builtin_astype((x), double2) #define as_double3(x) __builtin_astype((x), double3) #define as_double4(x) __builtin_astype((x), double4) #define as_double8(x) __builtin_astype((x), double8) #define as_double16(x) __builtin_astype((x), double16) #endif // cl_khr_fp64 #ifdef cl_khr_fp16 #define as_half(x) __builtin_astype((x), half) #define as_half2(x) __builtin_astype((x), half2) #define as_half3(x) __builtin_astype((x), half3) #define as_half4(x) __builtin_astype((x), half4) #define as_half8(x) __builtin_astype((x), half8) #define as_half16(x) __builtin_astype((x), half16) #endif // cl_khr_fp16 #define as_size_t(x) __builtin_astype((x), size_t) #define as_ptrdiff_t(x) __builtin_astype((x), ptrdiff_t) #define as_intptr_t(x) __builtin_astype((x), intptr_t) #define as_uintptr_t(x) __builtin_astype((x), uintptr_t) // C++ for OpenCL - __remove_address_space #if defined(__OPENCL_CPP_VERSION__) template struct __remove_address_space { using type = _Tp; }; #if defined(__opencl_c_generic_address_space) template struct __remove_address_space<__generic _Tp> { using type = _Tp; }; #endif template struct __remove_address_space<__global _Tp> { using type = _Tp; }; template struct __remove_address_space<__private _Tp> { using type = _Tp; }; template struct __remove_address_space<__local _Tp> { using type = _Tp; }; template struct __remove_address_space<__constant _Tp> { using type = _Tp; }; #endif // OpenCL v1.1 s6.9, v1.2/2.0 s6.10 - Function qualifiers #define __kernel_exec(X, typen) __kernel \ __attribute__((work_group_size_hint(X, 1, 1))) \ __attribute__((vec_type_hint(typen))) #define kernel_exec(X, typen) __kernel \ __attribute__((work_group_size_hint(X, 1, 1))) \ __attribute__((vec_type_hint(typen))) #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) // OpenCL v1.2 s6.12.13, v2.0 s6.13.13 - printf int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2))); #endif #ifdef cl_intel_device_side_avc_motion_estimation #define CLK_AVC_ME_MAJOR_16x16_INTEL 0x0 #define CLK_AVC_ME_MAJOR_16x8_INTEL 0x1 #define CLK_AVC_ME_MAJOR_8x16_INTEL 0x2 #define CLK_AVC_ME_MAJOR_8x8_INTEL 0x3 #define CLK_AVC_ME_MINOR_8x8_INTEL 0x0 #define CLK_AVC_ME_MINOR_8x4_INTEL 0x1 #define CLK_AVC_ME_MINOR_4x8_INTEL 0x2 #define CLK_AVC_ME_MINOR_4x4_INTEL 0x3 #define CLK_AVC_ME_MAJOR_FORWARD_INTEL 0x0 #define CLK_AVC_ME_MAJOR_BACKWARD_INTEL 0x1 #define CLK_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL 0x2 #define CLK_AVC_ME_PARTITION_MASK_ALL_INTEL 0x0 #define CLK_AVC_ME_PARTITION_MASK_16x16_INTEL 0x7E #define CLK_AVC_ME_PARTITION_MASK_16x8_INTEL 0x7D #define CLK_AVC_ME_PARTITION_MASK_8x16_INTEL 0x7B #define CLK_AVC_ME_PARTITION_MASK_8x8_INTEL 0x77 #define CLK_AVC_ME_PARTITION_MASK_8x4_INTEL 0x6F #define CLK_AVC_ME_PARTITION_MASK_4x8_INTEL 0x5F #define CLK_AVC_ME_PARTITION_MASK_4x4_INTEL 0x3F #define CLK_AVC_ME_SLICE_TYPE_PRED_INTEL 0x0 #define CLK_AVC_ME_SLICE_TYPE_BPRED_INTEL 0x1 #define CLK_AVC_ME_SLICE_TYPE_INTRA_INTEL 0x2 #define CLK_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL 0x0 #define CLK_AVC_ME_SEARCH_WINDOW_SMALL_INTEL 0x1 #define CLK_AVC_ME_SEARCH_WINDOW_TINY_INTEL 0x2 #define CLK_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL 0x3 #define CLK_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL 0x4 #define CLK_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL 0x5 #define CLK_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL 0x6 #define CLK_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL 0x7 #define CLK_AVC_ME_SEARCH_WINDOW_CUSTOM_INTEL 0x8 #define CLK_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0 #define CLK_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x2 #define CLK_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0 #define CLK_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1 #define CLK_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL 0x3 #define CLK_AVC_ME_COST_PRECISION_QPEL_INTEL 0x0 #define CLK_AVC_ME_COST_PRECISION_HPEL_INTEL 0x1 #define CLK_AVC_ME_COST_PRECISION_PEL_INTEL 0x2 #define CLK_AVC_ME_COST_PRECISION_DPEL_INTEL 0x3 #define CLK_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL 0x10 #define CLK_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL 0x15 #define CLK_AVC_ME_BIDIR_WEIGHT_HALF_INTEL 0x20 #define CLK_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 0x2B #define CLK_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 0x30 #define CLK_AVC_ME_BORDER_REACHED_LEFT_INTEL 0x0 #define CLK_AVC_ME_BORDER_REACHED_RIGHT_INTEL 0x2 #define CLK_AVC_ME_BORDER_REACHED_TOP_INTEL 0x4 #define CLK_AVC_ME_BORDER_REACHED_BOTTOM_INTEL 0x8 #define CLK_AVC_ME_INTRA_16x16_INTEL 0x0 #define CLK_AVC_ME_INTRA_8x8_INTEL 0x1 #define CLK_AVC_ME_INTRA_4x4_INTEL 0x2 #define CLK_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL 0x0 #define CLK_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL 0x4000 #define CLK_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL (0x1 << 24) #define CLK_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ENABLE_INTEL (0x2 << 24) #define CLK_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL (0x3 << 24) #define CLK_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL (0x55 << 24) #define CLK_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL (0xAA << 24) #define CLK_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL (0xFF << 24) #define CLK_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL (0x1 << 24) #define CLK_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL (0x2 << 24) #define CLK_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL (0x1 << 26) #define CLK_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL (0x2 << 26) #define CLK_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL (0x1 << 28) #define CLK_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL (0x2 << 28) #define CLK_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL (0x1 << 30) #define CLK_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL (0x2 << 30) #define CLK_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL 0x00 #define CLK_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL 0x80 #define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_ALL_INTEL 0x0 #define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL 0x6 #define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL 0x5 #define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL 0x3 #define CLK_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL 0x60 #define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL 0x10 #define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x8 #define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x4 #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0 #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2 #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3 #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4 #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4 #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5 #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6 #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7 #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8 #define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0 #define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 #define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2 #define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3 #define CLK_AVC_ME_FRAME_FORWARD_INTEL 0x1 #define CLK_AVC_ME_FRAME_BACKWARD_INTEL 0x2 #define CLK_AVC_ME_FRAME_DUAL_INTEL 0x3 #define CLK_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL 0x0 #define CLK_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL 0x1 #define CLK_AVC_ME_INITIALIZE_INTEL 0x0 #define CLK_AVC_IME_PAYLOAD_INITIALIZE_INTEL 0x0 #define CLK_AVC_REF_PAYLOAD_INITIALIZE_INTEL 0x0 #define CLK_AVC_SIC_PAYLOAD_INITIALIZE_INTEL 0x0 #define CLK_AVC_IME_RESULT_INITIALIZE_INTEL 0x0 #define CLK_AVC_REF_RESULT_INITIALIZE_INTEL 0x0 #define CLK_AVC_SIC_RESULT_INITIALIZE_INTEL 0x0 #define CLK_AVC_IME_RESULT_SINGLE_REFERENCE_STREAMOUT_INITIALIZE_INTEL 0x0 #define CLK_AVC_IME_RESULT_SINGLE_REFERENCE_STREAMIN_INITIALIZE_INTEL 0x0 #define CLK_AVC_IME_RESULT_DUAL_REFERENCE_STREAMOUT_INITIALIZE_INTEL 0x0 #define CLK_AVC_IME_RESULT_DUAL_REFERENCE_STREAMIN_INITIALIZE_INTEL 0x0 #endif // cl_intel_device_side_avc_motion_estimation // Disable any extensions we may have enabled previously. #pragma OPENCL EXTENSION all : disable #endif //_OPENCL_BASE_H_ //===--- opencl-c.h - OpenCL C language builtin function header -----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef _OPENCL_H_ #define _OPENCL_H_ #include "opencl-c-base.h" #if defined(__opencl_c_images) #ifndef cl_khr_depth_images #define cl_khr_depth_images #endif //cl_khr_depth_images #endif //defined(__opencl_c_images) #if __OPENCL_C_VERSION__ < CL_VERSION_2_0 #ifdef cl_khr_3d_image_writes #pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable #endif //cl_khr_3d_image_writes #endif //__OPENCL_C_VERSION__ < CL_VERSION_2_0 #if (defined(__OPENCL_CPP_VERSION__) || \ (__OPENCL_C_VERSION__ >= CL_VERSION_1_2)) && \ (defined(__SPIR__) || defined(__SPIRV__)) #pragma OPENCL EXTENSION cl_intel_planar_yuv : begin #pragma OPENCL EXTENSION cl_intel_planar_yuv : end #endif // (defined(__OPENCL_CPP_VERSION__) || // (__OPENCL_C_VERSION__ >= CL_VERSION_1_2)) && // (defined(__SPIR__) || defined(__SPIRV__)) #define __ovld __attribute__((overloadable)) #define __conv __attribute__((convergent)) // Optimizations #define __purefn __attribute__((pure)) #define __cnfn __attribute__((const)) // OpenCL v1.1/1.2/2.0 s6.2.3 - Explicit conversions char __ovld __cnfn convert_char_rte(char); char __ovld __cnfn convert_char_sat_rte(char); char __ovld __cnfn convert_char_rtz(char); char __ovld __cnfn convert_char_sat_rtz(char); char __ovld __cnfn convert_char_rtp(char); char __ovld __cnfn convert_char_sat_rtp(char); char __ovld __cnfn convert_char_rtn(char); char __ovld __cnfn convert_char_sat_rtn(char); char __ovld __cnfn convert_char(char); char __ovld __cnfn convert_char_sat(char); char __ovld __cnfn convert_char_rte(uchar); char __ovld __cnfn convert_char_sat_rte(uchar); char __ovld __cnfn convert_char_rtz(uchar); char __ovld __cnfn convert_char_sat_rtz(uchar); char __ovld __cnfn convert_char_rtp(uchar); char __ovld __cnfn convert_char_sat_rtp(uchar); char __ovld __cnfn convert_char_rtn(uchar); char __ovld __cnfn convert_char_sat_rtn(uchar); char __ovld __cnfn convert_char(uchar); char __ovld __cnfn convert_char_sat(uchar); char __ovld __cnfn convert_char_rte(short); char __ovld __cnfn convert_char_sat_rte(short); char __ovld __cnfn convert_char_rtz(short); char __ovld __cnfn convert_char_sat_rtz(short); char __ovld __cnfn convert_char_rtp(short); char __ovld __cnfn convert_char_sat_rtp(short); char __ovld __cnfn convert_char_rtn(short); char __ovld __cnfn convert_char_sat_rtn(short); char __ovld __cnfn convert_char(short); char __ovld __cnfn convert_char_sat(short); char __ovld __cnfn convert_char_rte(ushort); char __ovld __cnfn convert_char_sat_rte(ushort); char __ovld __cnfn convert_char_rtz(ushort); char __ovld __cnfn convert_char_sat_rtz(ushort); char __ovld __cnfn convert_char_rtp(ushort); char __ovld __cnfn convert_char_sat_rtp(ushort); char __ovld __cnfn convert_char_rtn(ushort); char __ovld __cnfn convert_char_sat_rtn(ushort); char __ovld __cnfn convert_char(ushort); char __ovld __cnfn convert_char_sat(ushort); char __ovld __cnfn convert_char_rte(int); char __ovld __cnfn convert_char_sat_rte(int); char __ovld __cnfn convert_char_rtz(int); char __ovld __cnfn convert_char_sat_rtz(int); char __ovld __cnfn convert_char_rtp(int); char __ovld __cnfn convert_char_sat_rtp(int); char __ovld __cnfn convert_char_rtn(int); char __ovld __cnfn convert_char_sat_rtn(int); char __ovld __cnfn convert_char(int); char __ovld __cnfn convert_char_sat(int); char __ovld __cnfn convert_char_rte(uint); char __ovld __cnfn convert_char_sat_rte(uint); char __ovld __cnfn convert_char_rtz(uint); char __ovld __cnfn convert_char_sat_rtz(uint); char __ovld __cnfn convert_char_rtp(uint); char __ovld __cnfn convert_char_sat_rtp(uint); char __ovld __cnfn convert_char_rtn(uint); char __ovld __cnfn convert_char_sat_rtn(uint); char __ovld __cnfn convert_char(uint); char __ovld __cnfn convert_char_sat(uint); char __ovld __cnfn convert_char_rte(long); char __ovld __cnfn convert_char_sat_rte(long); char __ovld __cnfn convert_char_rtz(long); char __ovld __cnfn convert_char_sat_rtz(long); char __ovld __cnfn convert_char_rtp(long); char __ovld __cnfn convert_char_sat_rtp(long); char __ovld __cnfn convert_char_rtn(long); char __ovld __cnfn convert_char_sat_rtn(long); char __ovld __cnfn convert_char(long); char __ovld __cnfn convert_char_sat(long); char __ovld __cnfn convert_char_rte(ulong); char __ovld __cnfn convert_char_sat_rte(ulong); char __ovld __cnfn convert_char_rtz(ulong); char __ovld __cnfn convert_char_sat_rtz(ulong); char __ovld __cnfn convert_char_rtp(ulong); char __ovld __cnfn convert_char_sat_rtp(ulong); char __ovld __cnfn convert_char_rtn(ulong); char __ovld __cnfn convert_char_sat_rtn(ulong); char __ovld __cnfn convert_char(ulong); char __ovld __cnfn convert_char_sat(ulong); char __ovld __cnfn convert_char_rte(float); char __ovld __cnfn convert_char_sat_rte(float); char __ovld __cnfn convert_char_rtz(float); char __ovld __cnfn convert_char_sat_rtz(float); char __ovld __cnfn convert_char_rtp(float); char __ovld __cnfn convert_char_sat_rtp(float); char __ovld __cnfn convert_char_rtn(float); char __ovld __cnfn convert_char_sat_rtn(float); char __ovld __cnfn convert_char(float); char __ovld __cnfn convert_char_sat(float); uchar __ovld __cnfn convert_uchar_rte(char); uchar __ovld __cnfn convert_uchar_sat_rte(char); uchar __ovld __cnfn convert_uchar_rtz(char); uchar __ovld __cnfn convert_uchar_sat_rtz(char); uchar __ovld __cnfn convert_uchar_rtp(char); uchar __ovld __cnfn convert_uchar_sat_rtp(char); uchar __ovld __cnfn convert_uchar_rtn(char); uchar __ovld __cnfn convert_uchar_sat_rtn(char); uchar __ovld __cnfn convert_uchar(char); uchar __ovld __cnfn convert_uchar_sat(char); uchar __ovld __cnfn convert_uchar_rte(uchar); uchar __ovld __cnfn convert_uchar_sat_rte(uchar); uchar __ovld __cnfn convert_uchar_rtz(uchar); uchar __ovld __cnfn convert_uchar_sat_rtz(uchar); uchar __ovld __cnfn convert_uchar_rtp(uchar); uchar __ovld __cnfn convert_uchar_sat_rtp(uchar); uchar __ovld __cnfn convert_uchar_rtn(uchar); uchar __ovld __cnfn convert_uchar_sat_rtn(uchar); uchar __ovld __cnfn convert_uchar(uchar); uchar __ovld __cnfn convert_uchar_sat(uchar); uchar __ovld __cnfn convert_uchar_rte(short); uchar __ovld __cnfn convert_uchar_sat_rte(short); uchar __ovld __cnfn convert_uchar_rtz(short); uchar __ovld __cnfn convert_uchar_sat_rtz(short); uchar __ovld __cnfn convert_uchar_rtp(short); uchar __ovld __cnfn convert_uchar_sat_rtp(short); uchar __ovld __cnfn convert_uchar_rtn(short); uchar __ovld __cnfn convert_uchar_sat_rtn(short); uchar __ovld __cnfn convert_uchar(short); uchar __ovld __cnfn convert_uchar_sat(short); uchar __ovld __cnfn convert_uchar_rte(ushort); uchar __ovld __cnfn convert_uchar_sat_rte(ushort); uchar __ovld __cnfn convert_uchar_rtz(ushort); uchar __ovld __cnfn convert_uchar_sat_rtz(ushort); uchar __ovld __cnfn convert_uchar_rtp(ushort); uchar __ovld __cnfn convert_uchar_sat_rtp(ushort); uchar __ovld __cnfn convert_uchar_rtn(ushort); uchar __ovld __cnfn convert_uchar_sat_rtn(ushort); uchar __ovld __cnfn convert_uchar(ushort); uchar __ovld __cnfn convert_uchar_sat(ushort); uchar __ovld __cnfn convert_uchar_rte(int); uchar __ovld __cnfn convert_uchar_sat_rte(int); uchar __ovld __cnfn convert_uchar_rtz(int); uchar __ovld __cnfn convert_uchar_sat_rtz(int); uchar __ovld __cnfn convert_uchar_rtp(int); uchar __ovld __cnfn convert_uchar_sat_rtp(int); uchar __ovld __cnfn convert_uchar_rtn(int); uchar __ovld __cnfn convert_uchar_sat_rtn(int); uchar __ovld __cnfn convert_uchar(int); uchar __ovld __cnfn convert_uchar_sat(int); uchar __ovld __cnfn convert_uchar_rte(uint); uchar __ovld __cnfn convert_uchar_sat_rte(uint); uchar __ovld __cnfn convert_uchar_rtz(uint); uchar __ovld __cnfn convert_uchar_sat_rtz(uint); uchar __ovld __cnfn convert_uchar_rtp(uint); uchar __ovld __cnfn convert_uchar_sat_rtp(uint); uchar __ovld __cnfn convert_uchar_rtn(uint); uchar __ovld __cnfn convert_uchar_sat_rtn(uint); uchar __ovld __cnfn convert_uchar(uint); uchar __ovld __cnfn convert_uchar_sat(uint); uchar __ovld __cnfn convert_uchar_rte(long); uchar __ovld __cnfn convert_uchar_sat_rte(long); uchar __ovld __cnfn convert_uchar_rtz(long); uchar __ovld __cnfn convert_uchar_sat_rtz(long); uchar __ovld __cnfn convert_uchar_rtp(long); uchar __ovld __cnfn convert_uchar_sat_rtp(long); uchar __ovld __cnfn convert_uchar_rtn(long); uchar __ovld __cnfn convert_uchar_sat_rtn(long); uchar __ovld __cnfn convert_uchar(long); uchar __ovld __cnfn convert_uchar_sat(long); uchar __ovld __cnfn convert_uchar_rte(ulong); uchar __ovld __cnfn convert_uchar_sat_rte(ulong); uchar __ovld __cnfn convert_uchar_rtz(ulong); uchar __ovld __cnfn convert_uchar_sat_rtz(ulong); uchar __ovld __cnfn convert_uchar_rtp(ulong); uchar __ovld __cnfn convert_uchar_sat_rtp(ulong); uchar __ovld __cnfn convert_uchar_rtn(ulong); uchar __ovld __cnfn convert_uchar_sat_rtn(ulong); uchar __ovld __cnfn convert_uchar(ulong); uchar __ovld __cnfn convert_uchar_sat(ulong); uchar __ovld __cnfn convert_uchar_rte(float); uchar __ovld __cnfn convert_uchar_sat_rte(float); uchar __ovld __cnfn convert_uchar_rtz(float); uchar __ovld __cnfn convert_uchar_sat_rtz(float); uchar __ovld __cnfn convert_uchar_rtp(float); uchar __ovld __cnfn convert_uchar_sat_rtp(float); uchar __ovld __cnfn convert_uchar_rtn(float); uchar __ovld __cnfn convert_uchar_sat_rtn(float); uchar __ovld __cnfn convert_uchar(float); uchar __ovld __cnfn convert_uchar_sat(float); short __ovld __cnfn convert_short_rte(char); short __ovld __cnfn convert_short_sat_rte(char); short __ovld __cnfn convert_short_rtz(char); short __ovld __cnfn convert_short_sat_rtz(char); short __ovld __cnfn convert_short_rtp(char); short __ovld __cnfn convert_short_sat_rtp(char); short __ovld __cnfn convert_short_rtn(char); short __ovld __cnfn convert_short_sat_rtn(char); short __ovld __cnfn convert_short(char); short __ovld __cnfn convert_short_sat(char); short __ovld __cnfn convert_short_rte(uchar); short __ovld __cnfn convert_short_sat_rte(uchar); short __ovld __cnfn convert_short_rtz(uchar); short __ovld __cnfn convert_short_sat_rtz(uchar); short __ovld __cnfn convert_short_rtp(uchar); short __ovld __cnfn convert_short_sat_rtp(uchar); short __ovld __cnfn convert_short_rtn(uchar); short __ovld __cnfn convert_short_sat_rtn(uchar); short __ovld __cnfn convert_short(uchar); short __ovld __cnfn convert_short_sat(uchar); short __ovld __cnfn convert_short_rte(short); short __ovld __cnfn convert_short_sat_rte(short); short __ovld __cnfn convert_short_rtz(short); short __ovld __cnfn convert_short_sat_rtz(short); short __ovld __cnfn convert_short_rtp(short); short __ovld __cnfn convert_short_sat_rtp(short); short __ovld __cnfn convert_short_rtn(short); short __ovld __cnfn convert_short_sat_rtn(short); short __ovld __cnfn convert_short(short); short __ovld __cnfn convert_short_sat(short); short __ovld __cnfn convert_short_rte(ushort); short __ovld __cnfn convert_short_sat_rte(ushort); short __ovld __cnfn convert_short_rtz(ushort); short __ovld __cnfn convert_short_sat_rtz(ushort); short __ovld __cnfn convert_short_rtp(ushort); short __ovld __cnfn convert_short_sat_rtp(ushort); short __ovld __cnfn convert_short_rtn(ushort); short __ovld __cnfn convert_short_sat_rtn(ushort); short __ovld __cnfn convert_short(ushort); short __ovld __cnfn convert_short_sat(ushort); short __ovld __cnfn convert_short_rte(int); short __ovld __cnfn convert_short_sat_rte(int); short __ovld __cnfn convert_short_rtz(int); short __ovld __cnfn convert_short_sat_rtz(int); short __ovld __cnfn convert_short_rtp(int); short __ovld __cnfn convert_short_sat_rtp(int); short __ovld __cnfn convert_short_rtn(int); short __ovld __cnfn convert_short_sat_rtn(int); short __ovld __cnfn convert_short(int); short __ovld __cnfn convert_short_sat(int); short __ovld __cnfn convert_short_rte(uint); short __ovld __cnfn convert_short_sat_rte(uint); short __ovld __cnfn convert_short_rtz(uint); short __ovld __cnfn convert_short_sat_rtz(uint); short __ovld __cnfn convert_short_rtp(uint); short __ovld __cnfn convert_short_sat_rtp(uint); short __ovld __cnfn convert_short_rtn(uint); short __ovld __cnfn convert_short_sat_rtn(uint); short __ovld __cnfn convert_short(uint); short __ovld __cnfn convert_short_sat(uint); short __ovld __cnfn convert_short_rte(long); short __ovld __cnfn convert_short_sat_rte(long); short __ovld __cnfn convert_short_rtz(long); short __ovld __cnfn convert_short_sat_rtz(long); short __ovld __cnfn convert_short_rtp(long); short __ovld __cnfn convert_short_sat_rtp(long); short __ovld __cnfn convert_short_rtn(long); short __ovld __cnfn convert_short_sat_rtn(long); short __ovld __cnfn convert_short(long); short __ovld __cnfn convert_short_sat(long); short __ovld __cnfn convert_short_rte(ulong); short __ovld __cnfn convert_short_sat_rte(ulong); short __ovld __cnfn convert_short_rtz(ulong); short __ovld __cnfn convert_short_sat_rtz(ulong); short __ovld __cnfn convert_short_rtp(ulong); short __ovld __cnfn convert_short_sat_rtp(ulong); short __ovld __cnfn convert_short_rtn(ulong); short __ovld __cnfn convert_short_sat_rtn(ulong); short __ovld __cnfn convert_short(ulong); short __ovld __cnfn convert_short_sat(ulong); short __ovld __cnfn convert_short_rte(float); short __ovld __cnfn convert_short_sat_rte(float); short __ovld __cnfn convert_short_rtz(float); short __ovld __cnfn convert_short_sat_rtz(float); short __ovld __cnfn convert_short_rtp(float); short __ovld __cnfn convert_short_sat_rtp(float); short __ovld __cnfn convert_short_rtn(float); short __ovld __cnfn convert_short_sat_rtn(float); short __ovld __cnfn convert_short(float); short __ovld __cnfn convert_short_sat(float); ushort __ovld __cnfn convert_ushort_rte(char); ushort __ovld __cnfn convert_ushort_sat_rte(char); ushort __ovld __cnfn convert_ushort_rtz(char); ushort __ovld __cnfn convert_ushort_sat_rtz(char); ushort __ovld __cnfn convert_ushort_rtp(char); ushort __ovld __cnfn convert_ushort_sat_rtp(char); ushort __ovld __cnfn convert_ushort_rtn(char); ushort __ovld __cnfn convert_ushort_sat_rtn(char); ushort __ovld __cnfn convert_ushort(char); ushort __ovld __cnfn convert_ushort_sat(char); ushort __ovld __cnfn convert_ushort_rte(uchar); ushort __ovld __cnfn convert_ushort_sat_rte(uchar); ushort __ovld __cnfn convert_ushort_rtz(uchar); ushort __ovld __cnfn convert_ushort_sat_rtz(uchar); ushort __ovld __cnfn convert_ushort_rtp(uchar); ushort __ovld __cnfn convert_ushort_sat_rtp(uchar); ushort __ovld __cnfn convert_ushort_rtn(uchar); ushort __ovld __cnfn convert_ushort_sat_rtn(uchar); ushort __ovld __cnfn convert_ushort(uchar); ushort __ovld __cnfn convert_ushort_sat(uchar); ushort __ovld __cnfn convert_ushort_rte(short); ushort __ovld __cnfn convert_ushort_sat_rte(short); ushort __ovld __cnfn convert_ushort_rtz(short); ushort __ovld __cnfn convert_ushort_sat_rtz(short); ushort __ovld __cnfn convert_ushort_rtp(short); ushort __ovld __cnfn convert_ushort_sat_rtp(short); ushort __ovld __cnfn convert_ushort_rtn(short); ushort __ovld __cnfn convert_ushort_sat_rtn(short); ushort __ovld __cnfn convert_ushort(short); ushort __ovld __cnfn convert_ushort_sat(short); ushort __ovld __cnfn convert_ushort_rte(ushort); ushort __ovld __cnfn convert_ushort_sat_rte(ushort); ushort __ovld __cnfn convert_ushort_rtz(ushort); ushort __ovld __cnfn convert_ushort_sat_rtz(ushort); ushort __ovld __cnfn convert_ushort_rtp(ushort); ushort __ovld __cnfn convert_ushort_sat_rtp(ushort); ushort __ovld __cnfn convert_ushort_rtn(ushort); ushort __ovld __cnfn convert_ushort_sat_rtn(ushort); ushort __ovld __cnfn convert_ushort(ushort); ushort __ovld __cnfn convert_ushort_sat(ushort); ushort __ovld __cnfn convert_ushort_rte(int); ushort __ovld __cnfn convert_ushort_sat_rte(int); ushort __ovld __cnfn convert_ushort_rtz(int); ushort __ovld __cnfn convert_ushort_sat_rtz(int); ushort __ovld __cnfn convert_ushort_rtp(int); ushort __ovld __cnfn convert_ushort_sat_rtp(int); ushort __ovld __cnfn convert_ushort_rtn(int); ushort __ovld __cnfn convert_ushort_sat_rtn(int); ushort __ovld __cnfn convert_ushort(int); ushort __ovld __cnfn convert_ushort_sat(int); ushort __ovld __cnfn convert_ushort_rte(uint); ushort __ovld __cnfn convert_ushort_sat_rte(uint); ushort __ovld __cnfn convert_ushort_rtz(uint); ushort __ovld __cnfn convert_ushort_sat_rtz(uint); ushort __ovld __cnfn convert_ushort_rtp(uint); ushort __ovld __cnfn convert_ushort_sat_rtp(uint); ushort __ovld __cnfn convert_ushort_rtn(uint); ushort __ovld __cnfn convert_ushort_sat_rtn(uint); ushort __ovld __cnfn convert_ushort(uint); ushort __ovld __cnfn convert_ushort_sat(uint); ushort __ovld __cnfn convert_ushort_rte(long); ushort __ovld __cnfn convert_ushort_sat_rte(long); ushort __ovld __cnfn convert_ushort_rtz(long); ushort __ovld __cnfn convert_ushort_sat_rtz(long); ushort __ovld __cnfn convert_ushort_rtp(long); ushort __ovld __cnfn convert_ushort_sat_rtp(long); ushort __ovld __cnfn convert_ushort_rtn(long); ushort __ovld __cnfn convert_ushort_sat_rtn(long); ushort __ovld __cnfn convert_ushort(long); ushort __ovld __cnfn convert_ushort_sat(long); ushort __ovld __cnfn convert_ushort_rte(ulong); ushort __ovld __cnfn convert_ushort_sat_rte(ulong); ushort __ovld __cnfn convert_ushort_rtz(ulong); ushort __ovld __cnfn convert_ushort_sat_rtz(ulong); ushort __ovld __cnfn convert_ushort_rtp(ulong); ushort __ovld __cnfn convert_ushort_sat_rtp(ulong); ushort __ovld __cnfn convert_ushort_rtn(ulong); ushort __ovld __cnfn convert_ushort_sat_rtn(ulong); ushort __ovld __cnfn convert_ushort(ulong); ushort __ovld __cnfn convert_ushort_sat(ulong); ushort __ovld __cnfn convert_ushort_rte(float); ushort __ovld __cnfn convert_ushort_sat_rte(float); ushort __ovld __cnfn convert_ushort_rtz(float); ushort __ovld __cnfn convert_ushort_sat_rtz(float); ushort __ovld __cnfn convert_ushort_rtp(float); ushort __ovld __cnfn convert_ushort_sat_rtp(float); ushort __ovld __cnfn convert_ushort_rtn(float); ushort __ovld __cnfn convert_ushort_sat_rtn(float); ushort __ovld __cnfn convert_ushort(float); ushort __ovld __cnfn convert_ushort_sat(float); int __ovld __cnfn convert_int_rte(char); int __ovld __cnfn convert_int_sat_rte(char); int __ovld __cnfn convert_int_rtz(char); int __ovld __cnfn convert_int_sat_rtz(char); int __ovld __cnfn convert_int_rtp(char); int __ovld __cnfn convert_int_sat_rtp(char); int __ovld __cnfn convert_int_rtn(char); int __ovld __cnfn convert_int_sat_rtn(char); int __ovld __cnfn convert_int(char); int __ovld __cnfn convert_int_sat(char); int __ovld __cnfn convert_int_rte(uchar); int __ovld __cnfn convert_int_sat_rte(uchar); int __ovld __cnfn convert_int_rtz(uchar); int __ovld __cnfn convert_int_sat_rtz(uchar); int __ovld __cnfn convert_int_rtp(uchar); int __ovld __cnfn convert_int_sat_rtp(uchar); int __ovld __cnfn convert_int_rtn(uchar); int __ovld __cnfn convert_int_sat_rtn(uchar); int __ovld __cnfn convert_int(uchar); int __ovld __cnfn convert_int_sat(uchar); int __ovld __cnfn convert_int_rte(short); int __ovld __cnfn convert_int_sat_rte(short); int __ovld __cnfn convert_int_rtz(short); int __ovld __cnfn convert_int_sat_rtz(short); int __ovld __cnfn convert_int_rtp(short); int __ovld __cnfn convert_int_sat_rtp(short); int __ovld __cnfn convert_int_rtn(short); int __ovld __cnfn convert_int_sat_rtn(short); int __ovld __cnfn convert_int(short); int __ovld __cnfn convert_int_sat(short); int __ovld __cnfn convert_int_rte(ushort); int __ovld __cnfn convert_int_sat_rte(ushort); int __ovld __cnfn convert_int_rtz(ushort); int __ovld __cnfn convert_int_sat_rtz(ushort); int __ovld __cnfn convert_int_rtp(ushort); int __ovld __cnfn convert_int_sat_rtp(ushort); int __ovld __cnfn convert_int_rtn(ushort); int __ovld __cnfn convert_int_sat_rtn(ushort); int __ovld __cnfn convert_int(ushort); int __ovld __cnfn convert_int_sat(ushort); int __ovld __cnfn convert_int_rte(int); int __ovld __cnfn convert_int_sat_rte(int); int __ovld __cnfn convert_int_rtz(int); int __ovld __cnfn convert_int_sat_rtz(int); int __ovld __cnfn convert_int_rtp(int); int __ovld __cnfn convert_int_sat_rtp(int); int __ovld __cnfn convert_int_rtn(int); int __ovld __cnfn convert_int_sat_rtn(int); int __ovld __cnfn convert_int(int); int __ovld __cnfn convert_int_sat(int); int __ovld __cnfn convert_int_rte(uint); int __ovld __cnfn convert_int_sat_rte(uint); int __ovld __cnfn convert_int_rtz(uint); int __ovld __cnfn convert_int_sat_rtz(uint); int __ovld __cnfn convert_int_rtp(uint); int __ovld __cnfn convert_int_sat_rtp(uint); int __ovld __cnfn convert_int_rtn(uint); int __ovld __cnfn convert_int_sat_rtn(uint); int __ovld __cnfn convert_int(uint); int __ovld __cnfn convert_int_sat(uint); int __ovld __cnfn convert_int_rte(long); int __ovld __cnfn convert_int_sat_rte(long); int __ovld __cnfn convert_int_rtz(long); int __ovld __cnfn convert_int_sat_rtz(long); int __ovld __cnfn convert_int_rtp(long); int __ovld __cnfn convert_int_sat_rtp(long); int __ovld __cnfn convert_int_rtn(long); int __ovld __cnfn convert_int_sat_rtn(long); int __ovld __cnfn convert_int(long); int __ovld __cnfn convert_int_sat(long); int __ovld __cnfn convert_int_rte(ulong); int __ovld __cnfn convert_int_sat_rte(ulong); int __ovld __cnfn convert_int_rtz(ulong); int __ovld __cnfn convert_int_sat_rtz(ulong); int __ovld __cnfn convert_int_rtp(ulong); int __ovld __cnfn convert_int_sat_rtp(ulong); int __ovld __cnfn convert_int_rtn(ulong); int __ovld __cnfn convert_int_sat_rtn(ulong); int __ovld __cnfn convert_int(ulong); int __ovld __cnfn convert_int_sat(ulong); int __ovld __cnfn convert_int_rte(float); int __ovld __cnfn convert_int_sat_rte(float); int __ovld __cnfn convert_int_rtz(float); int __ovld __cnfn convert_int_sat_rtz(float); int __ovld __cnfn convert_int_rtp(float); int __ovld __cnfn convert_int_sat_rtp(float); int __ovld __cnfn convert_int_rtn(float); int __ovld __cnfn convert_int_sat_rtn(float); int __ovld __cnfn convert_int(float); int __ovld __cnfn convert_int_sat(float); uint __ovld __cnfn convert_uint_rte(char); uint __ovld __cnfn convert_uint_sat_rte(char); uint __ovld __cnfn convert_uint_rtz(char); uint __ovld __cnfn convert_uint_sat_rtz(char); uint __ovld __cnfn convert_uint_rtp(char); uint __ovld __cnfn convert_uint_sat_rtp(char); uint __ovld __cnfn convert_uint_rtn(char); uint __ovld __cnfn convert_uint_sat_rtn(char); uint __ovld __cnfn convert_uint(char); uint __ovld __cnfn convert_uint_sat(char); uint __ovld __cnfn convert_uint_rte(uchar); uint __ovld __cnfn convert_uint_sat_rte(uchar); uint __ovld __cnfn convert_uint_rtz(uchar); uint __ovld __cnfn convert_uint_sat_rtz(uchar); uint __ovld __cnfn convert_uint_rtp(uchar); uint __ovld __cnfn convert_uint_sat_rtp(uchar); uint __ovld __cnfn convert_uint_rtn(uchar); uint __ovld __cnfn convert_uint_sat_rtn(uchar); uint __ovld __cnfn convert_uint(uchar); uint __ovld __cnfn convert_uint_sat(uchar); uint __ovld __cnfn convert_uint_rte(short); uint __ovld __cnfn convert_uint_sat_rte(short); uint __ovld __cnfn convert_uint_rtz(short); uint __ovld __cnfn convert_uint_sat_rtz(short); uint __ovld __cnfn convert_uint_rtp(short); uint __ovld __cnfn convert_uint_sat_rtp(short); uint __ovld __cnfn convert_uint_rtn(short); uint __ovld __cnfn convert_uint_sat_rtn(short); uint __ovld __cnfn convert_uint(short); uint __ovld __cnfn convert_uint_sat(short); uint __ovld __cnfn convert_uint_rte(ushort); uint __ovld __cnfn convert_uint_sat_rte(ushort); uint __ovld __cnfn convert_uint_rtz(ushort); uint __ovld __cnfn convert_uint_sat_rtz(ushort); uint __ovld __cnfn convert_uint_rtp(ushort); uint __ovld __cnfn convert_uint_sat_rtp(ushort); uint __ovld __cnfn convert_uint_rtn(ushort); uint __ovld __cnfn convert_uint_sat_rtn(ushort); uint __ovld __cnfn convert_uint(ushort); uint __ovld __cnfn convert_uint_sat(ushort); uint __ovld __cnfn convert_uint_rte(int); uint __ovld __cnfn convert_uint_sat_rte(int); uint __ovld __cnfn convert_uint_rtz(int); uint __ovld __cnfn convert_uint_sat_rtz(int); uint __ovld __cnfn convert_uint_rtp(int); uint __ovld __cnfn convert_uint_sat_rtp(int); uint __ovld __cnfn convert_uint_rtn(int); uint __ovld __cnfn convert_uint_sat_rtn(int); uint __ovld __cnfn convert_uint(int); uint __ovld __cnfn convert_uint_sat(int); uint __ovld __cnfn convert_uint_rte(uint); uint __ovld __cnfn convert_uint_sat_rte(uint); uint __ovld __cnfn convert_uint_rtz(uint); uint __ovld __cnfn convert_uint_sat_rtz(uint); uint __ovld __cnfn convert_uint_rtp(uint); uint __ovld __cnfn convert_uint_sat_rtp(uint); uint __ovld __cnfn convert_uint_rtn(uint); uint __ovld __cnfn convert_uint_sat_rtn(uint); uint __ovld __cnfn convert_uint(uint); uint __ovld __cnfn convert_uint_sat(uint); uint __ovld __cnfn convert_uint_rte(long); uint __ovld __cnfn convert_uint_sat_rte(long); uint __ovld __cnfn convert_uint_rtz(long); uint __ovld __cnfn convert_uint_sat_rtz(long); uint __ovld __cnfn convert_uint_rtp(long); uint __ovld __cnfn convert_uint_sat_rtp(long); uint __ovld __cnfn convert_uint_rtn(long); uint __ovld __cnfn convert_uint_sat_rtn(long); uint __ovld __cnfn convert_uint(long); uint __ovld __cnfn convert_uint_sat(long); uint __ovld __cnfn convert_uint_rte(ulong); uint __ovld __cnfn convert_uint_sat_rte(ulong); uint __ovld __cnfn convert_uint_rtz(ulong); uint __ovld __cnfn convert_uint_sat_rtz(ulong); uint __ovld __cnfn convert_uint_rtp(ulong); uint __ovld __cnfn convert_uint_sat_rtp(ulong); uint __ovld __cnfn convert_uint_rtn(ulong); uint __ovld __cnfn convert_uint_sat_rtn(ulong); uint __ovld __cnfn convert_uint(ulong); uint __ovld __cnfn convert_uint_sat(ulong); uint __ovld __cnfn convert_uint_rte(float); uint __ovld __cnfn convert_uint_sat_rte(float); uint __ovld __cnfn convert_uint_rtz(float); uint __ovld __cnfn convert_uint_sat_rtz(float); uint __ovld __cnfn convert_uint_rtp(float); uint __ovld __cnfn convert_uint_sat_rtp(float); uint __ovld __cnfn convert_uint_rtn(float); uint __ovld __cnfn convert_uint_sat_rtn(float); uint __ovld __cnfn convert_uint(float); uint __ovld __cnfn convert_uint_sat(float); long __ovld __cnfn convert_long_rte(char); long __ovld __cnfn convert_long_sat_rte(char); long __ovld __cnfn convert_long_rtz(char); long __ovld __cnfn convert_long_sat_rtz(char); long __ovld __cnfn convert_long_rtp(char); long __ovld __cnfn convert_long_sat_rtp(char); long __ovld __cnfn convert_long_rtn(char); long __ovld __cnfn convert_long_sat_rtn(char); long __ovld __cnfn convert_long(char); long __ovld __cnfn convert_long_sat(char); long __ovld __cnfn convert_long_rte(uchar); long __ovld __cnfn convert_long_sat_rte(uchar); long __ovld __cnfn convert_long_rtz(uchar); long __ovld __cnfn convert_long_sat_rtz(uchar); long __ovld __cnfn convert_long_rtp(uchar); long __ovld __cnfn convert_long_sat_rtp(uchar); long __ovld __cnfn convert_long_rtn(uchar); long __ovld __cnfn convert_long_sat_rtn(uchar); long __ovld __cnfn convert_long(uchar); long __ovld __cnfn convert_long_sat(uchar); long __ovld __cnfn convert_long_rte(short); long __ovld __cnfn convert_long_sat_rte(short); long __ovld __cnfn convert_long_rtz(short); long __ovld __cnfn convert_long_sat_rtz(short); long __ovld __cnfn convert_long_rtp(short); long __ovld __cnfn convert_long_sat_rtp(short); long __ovld __cnfn convert_long_rtn(short); long __ovld __cnfn convert_long_sat_rtn(short); long __ovld __cnfn convert_long(short); long __ovld __cnfn convert_long_sat(short); long __ovld __cnfn convert_long_rte(ushort); long __ovld __cnfn convert_long_sat_rte(ushort); long __ovld __cnfn convert_long_rtz(ushort); long __ovld __cnfn convert_long_sat_rtz(ushort); long __ovld __cnfn convert_long_rtp(ushort); long __ovld __cnfn convert_long_sat_rtp(ushort); long __ovld __cnfn convert_long_rtn(ushort); long __ovld __cnfn convert_long_sat_rtn(ushort); long __ovld __cnfn convert_long(ushort); long __ovld __cnfn convert_long_sat(ushort); long __ovld __cnfn convert_long_rte(int); long __ovld __cnfn convert_long_sat_rte(int); long __ovld __cnfn convert_long_rtz(int); long __ovld __cnfn convert_long_sat_rtz(int); long __ovld __cnfn convert_long_rtp(int); long __ovld __cnfn convert_long_sat_rtp(int); long __ovld __cnfn convert_long_rtn(int); long __ovld __cnfn convert_long_sat_rtn(int); long __ovld __cnfn convert_long(int); long __ovld __cnfn convert_long_sat(int); long __ovld __cnfn convert_long_rte(uint); long __ovld __cnfn convert_long_sat_rte(uint); long __ovld __cnfn convert_long_rtz(uint); long __ovld __cnfn convert_long_sat_rtz(uint); long __ovld __cnfn convert_long_rtp(uint); long __ovld __cnfn convert_long_sat_rtp(uint); long __ovld __cnfn convert_long_rtn(uint); long __ovld __cnfn convert_long_sat_rtn(uint); long __ovld __cnfn convert_long(uint); long __ovld __cnfn convert_long_sat(uint); long __ovld __cnfn convert_long_rte(long); long __ovld __cnfn convert_long_sat_rte(long); long __ovld __cnfn convert_long_rtz(long); long __ovld __cnfn convert_long_sat_rtz(long); long __ovld __cnfn convert_long_rtp(long); long __ovld __cnfn convert_long_sat_rtp(long); long __ovld __cnfn convert_long_rtn(long); long __ovld __cnfn convert_long_sat_rtn(long); long __ovld __cnfn convert_long(long); long __ovld __cnfn convert_long_sat(long); long __ovld __cnfn convert_long_rte(ulong); long __ovld __cnfn convert_long_sat_rte(ulong); long __ovld __cnfn convert_long_rtz(ulong); long __ovld __cnfn convert_long_sat_rtz(ulong); long __ovld __cnfn convert_long_rtp(ulong); long __ovld __cnfn convert_long_sat_rtp(ulong); long __ovld __cnfn convert_long_rtn(ulong); long __ovld __cnfn convert_long_sat_rtn(ulong); long __ovld __cnfn convert_long(ulong); long __ovld __cnfn convert_long_sat(ulong); long __ovld __cnfn convert_long_rte(float); long __ovld __cnfn convert_long_sat_rte(float); long __ovld __cnfn convert_long_rtz(float); long __ovld __cnfn convert_long_sat_rtz(float); long __ovld __cnfn convert_long_rtp(float); long __ovld __cnfn convert_long_sat_rtp(float); long __ovld __cnfn convert_long_rtn(float); long __ovld __cnfn convert_long_sat_rtn(float); long __ovld __cnfn convert_long(float); long __ovld __cnfn convert_long_sat(float); ulong __ovld __cnfn convert_ulong_rte(char); ulong __ovld __cnfn convert_ulong_sat_rte(char); ulong __ovld __cnfn convert_ulong_rtz(char); ulong __ovld __cnfn convert_ulong_sat_rtz(char); ulong __ovld __cnfn convert_ulong_rtp(char); ulong __ovld __cnfn convert_ulong_sat_rtp(char); ulong __ovld __cnfn convert_ulong_rtn(char); ulong __ovld __cnfn convert_ulong_sat_rtn(char); ulong __ovld __cnfn convert_ulong(char); ulong __ovld __cnfn convert_ulong_sat(char); ulong __ovld __cnfn convert_ulong_rte(uchar); ulong __ovld __cnfn convert_ulong_sat_rte(uchar); ulong __ovld __cnfn convert_ulong_rtz(uchar); ulong __ovld __cnfn convert_ulong_sat_rtz(uchar); ulong __ovld __cnfn convert_ulong_rtp(uchar); ulong __ovld __cnfn convert_ulong_sat_rtp(uchar); ulong __ovld __cnfn convert_ulong_rtn(uchar); ulong __ovld __cnfn convert_ulong_sat_rtn(uchar); ulong __ovld __cnfn convert_ulong(uchar); ulong __ovld __cnfn convert_ulong_sat(uchar); ulong __ovld __cnfn convert_ulong_rte(short); ulong __ovld __cnfn convert_ulong_sat_rte(short); ulong __ovld __cnfn convert_ulong_rtz(short); ulong __ovld __cnfn convert_ulong_sat_rtz(short); ulong __ovld __cnfn convert_ulong_rtp(short); ulong __ovld __cnfn convert_ulong_sat_rtp(short); ulong __ovld __cnfn convert_ulong_rtn(short); ulong __ovld __cnfn convert_ulong_sat_rtn(short); ulong __ovld __cnfn convert_ulong(short); ulong __ovld __cnfn convert_ulong_sat(short); ulong __ovld __cnfn convert_ulong_rte(ushort); ulong __ovld __cnfn convert_ulong_sat_rte(ushort); ulong __ovld __cnfn convert_ulong_rtz(ushort); ulong __ovld __cnfn convert_ulong_sat_rtz(ushort); ulong __ovld __cnfn convert_ulong_rtp(ushort); ulong __ovld __cnfn convert_ulong_sat_rtp(ushort); ulong __ovld __cnfn convert_ulong_rtn(ushort); ulong __ovld __cnfn convert_ulong_sat_rtn(ushort); ulong __ovld __cnfn convert_ulong(ushort); ulong __ovld __cnfn convert_ulong_sat(ushort); ulong __ovld __cnfn convert_ulong_rte(int); ulong __ovld __cnfn convert_ulong_sat_rte(int); ulong __ovld __cnfn convert_ulong_rtz(int); ulong __ovld __cnfn convert_ulong_sat_rtz(int); ulong __ovld __cnfn convert_ulong_rtp(int); ulong __ovld __cnfn convert_ulong_sat_rtp(int); ulong __ovld __cnfn convert_ulong_rtn(int); ulong __ovld __cnfn convert_ulong_sat_rtn(int); ulong __ovld __cnfn convert_ulong(int); ulong __ovld __cnfn convert_ulong_sat(int); ulong __ovld __cnfn convert_ulong_rte(uint); ulong __ovld __cnfn convert_ulong_sat_rte(uint); ulong __ovld __cnfn convert_ulong_rtz(uint); ulong __ovld __cnfn convert_ulong_sat_rtz(uint); ulong __ovld __cnfn convert_ulong_rtp(uint); ulong __ovld __cnfn convert_ulong_sat_rtp(uint); ulong __ovld __cnfn convert_ulong_rtn(uint); ulong __ovld __cnfn convert_ulong_sat_rtn(uint); ulong __ovld __cnfn convert_ulong(uint); ulong __ovld __cnfn convert_ulong_sat(uint); ulong __ovld __cnfn convert_ulong_rte(long); ulong __ovld __cnfn convert_ulong_sat_rte(long); ulong __ovld __cnfn convert_ulong_rtz(long); ulong __ovld __cnfn convert_ulong_sat_rtz(long); ulong __ovld __cnfn convert_ulong_rtp(long); ulong __ovld __cnfn convert_ulong_sat_rtp(long); ulong __ovld __cnfn convert_ulong_rtn(long); ulong __ovld __cnfn convert_ulong_sat_rtn(long); ulong __ovld __cnfn convert_ulong(long); ulong __ovld __cnfn convert_ulong_sat(long); ulong __ovld __cnfn convert_ulong_rte(ulong); ulong __ovld __cnfn convert_ulong_sat_rte(ulong); ulong __ovld __cnfn convert_ulong_rtz(ulong); ulong __ovld __cnfn convert_ulong_sat_rtz(ulong); ulong __ovld __cnfn convert_ulong_rtp(ulong); ulong __ovld __cnfn convert_ulong_sat_rtp(ulong); ulong __ovld __cnfn convert_ulong_rtn(ulong); ulong __ovld __cnfn convert_ulong_sat_rtn(ulong); ulong __ovld __cnfn convert_ulong(ulong); ulong __ovld __cnfn convert_ulong_sat(ulong); ulong __ovld __cnfn convert_ulong_rte(float); ulong __ovld __cnfn convert_ulong_sat_rte(float); ulong __ovld __cnfn convert_ulong_rtz(float); ulong __ovld __cnfn convert_ulong_sat_rtz(float); ulong __ovld __cnfn convert_ulong_rtp(float); ulong __ovld __cnfn convert_ulong_sat_rtp(float); ulong __ovld __cnfn convert_ulong_rtn(float); ulong __ovld __cnfn convert_ulong_sat_rtn(float); ulong __ovld __cnfn convert_ulong(float); ulong __ovld __cnfn convert_ulong_sat(float); float __ovld __cnfn convert_float_rte(char); float __ovld __cnfn convert_float_rtz(char); float __ovld __cnfn convert_float_rtp(char); float __ovld __cnfn convert_float_rtn(char); float __ovld __cnfn convert_float(char); float __ovld __cnfn convert_float_rte(uchar); float __ovld __cnfn convert_float_rtz(uchar); float __ovld __cnfn convert_float_rtp(uchar); float __ovld __cnfn convert_float_rtn(uchar); float __ovld __cnfn convert_float(uchar); float __ovld __cnfn convert_float_rte(short); float __ovld __cnfn convert_float_rtz(short); float __ovld __cnfn convert_float_rtp(short); float __ovld __cnfn convert_float_rtn(short); float __ovld __cnfn convert_float(short); float __ovld __cnfn convert_float_rte(ushort); float __ovld __cnfn convert_float_rtz(ushort); float __ovld __cnfn convert_float_rtp(ushort); float __ovld __cnfn convert_float_rtn(ushort); float __ovld __cnfn convert_float(ushort); float __ovld __cnfn convert_float_rte(int); float __ovld __cnfn convert_float_rtz(int); float __ovld __cnfn convert_float_rtp(int); float __ovld __cnfn convert_float_rtn(int); float __ovld __cnfn convert_float(int); float __ovld __cnfn convert_float_rte(uint); float __ovld __cnfn convert_float_rtz(uint); float __ovld __cnfn convert_float_rtp(uint); float __ovld __cnfn convert_float_rtn(uint); float __ovld __cnfn convert_float(uint); float __ovld __cnfn convert_float_rte(long); float __ovld __cnfn convert_float_rtz(long); float __ovld __cnfn convert_float_rtp(long); float __ovld __cnfn convert_float_rtn(long); float __ovld __cnfn convert_float(long); float __ovld __cnfn convert_float_rte(ulong); float __ovld __cnfn convert_float_rtz(ulong); float __ovld __cnfn convert_float_rtp(ulong); float __ovld __cnfn convert_float_rtn(ulong); float __ovld __cnfn convert_float(ulong); float __ovld __cnfn convert_float_rte(float); float __ovld __cnfn convert_float_rtz(float); float __ovld __cnfn convert_float_rtp(float); float __ovld __cnfn convert_float_rtn(float); float __ovld __cnfn convert_float(float); char2 __ovld __cnfn convert_char2_rte(char2); char2 __ovld __cnfn convert_char2_sat_rte(char2); char2 __ovld __cnfn convert_char2_rtz(char2); char2 __ovld __cnfn convert_char2_sat_rtz(char2); char2 __ovld __cnfn convert_char2_rtp(char2); char2 __ovld __cnfn convert_char2_sat_rtp(char2); char2 __ovld __cnfn convert_char2_rtn(char2); char2 __ovld __cnfn convert_char2_sat_rtn(char2); char2 __ovld __cnfn convert_char2(char2); char2 __ovld __cnfn convert_char2_sat(char2); char2 __ovld __cnfn convert_char2_rte(uchar2); char2 __ovld __cnfn convert_char2_sat_rte(uchar2); char2 __ovld __cnfn convert_char2_rtz(uchar2); char2 __ovld __cnfn convert_char2_sat_rtz(uchar2); char2 __ovld __cnfn convert_char2_rtp(uchar2); char2 __ovld __cnfn convert_char2_sat_rtp(uchar2); char2 __ovld __cnfn convert_char2_rtn(uchar2); char2 __ovld __cnfn convert_char2_sat_rtn(uchar2); char2 __ovld __cnfn convert_char2(uchar2); char2 __ovld __cnfn convert_char2_sat(uchar2); char2 __ovld __cnfn convert_char2_rte(short2); char2 __ovld __cnfn convert_char2_sat_rte(short2); char2 __ovld __cnfn convert_char2_rtz(short2); char2 __ovld __cnfn convert_char2_sat_rtz(short2); char2 __ovld __cnfn convert_char2_rtp(short2); char2 __ovld __cnfn convert_char2_sat_rtp(short2); char2 __ovld __cnfn convert_char2_rtn(short2); char2 __ovld __cnfn convert_char2_sat_rtn(short2); char2 __ovld __cnfn convert_char2(short2); char2 __ovld __cnfn convert_char2_sat(short2); char2 __ovld __cnfn convert_char2_rte(ushort2); char2 __ovld __cnfn convert_char2_sat_rte(ushort2); char2 __ovld __cnfn convert_char2_rtz(ushort2); char2 __ovld __cnfn convert_char2_sat_rtz(ushort2); char2 __ovld __cnfn convert_char2_rtp(ushort2); char2 __ovld __cnfn convert_char2_sat_rtp(ushort2); char2 __ovld __cnfn convert_char2_rtn(ushort2); char2 __ovld __cnfn convert_char2_sat_rtn(ushort2); char2 __ovld __cnfn convert_char2(ushort2); char2 __ovld __cnfn convert_char2_sat(ushort2); char2 __ovld __cnfn convert_char2_rte(int2); char2 __ovld __cnfn convert_char2_sat_rte(int2); char2 __ovld __cnfn convert_char2_rtz(int2); char2 __ovld __cnfn convert_char2_sat_rtz(int2); char2 __ovld __cnfn convert_char2_rtp(int2); char2 __ovld __cnfn convert_char2_sat_rtp(int2); char2 __ovld __cnfn convert_char2_rtn(int2); char2 __ovld __cnfn convert_char2_sat_rtn(int2); char2 __ovld __cnfn convert_char2(int2); char2 __ovld __cnfn convert_char2_sat(int2); char2 __ovld __cnfn convert_char2_rte(uint2); char2 __ovld __cnfn convert_char2_sat_rte(uint2); char2 __ovld __cnfn convert_char2_rtz(uint2); char2 __ovld __cnfn convert_char2_sat_rtz(uint2); char2 __ovld __cnfn convert_char2_rtp(uint2); char2 __ovld __cnfn convert_char2_sat_rtp(uint2); char2 __ovld __cnfn convert_char2_rtn(uint2); char2 __ovld __cnfn convert_char2_sat_rtn(uint2); char2 __ovld __cnfn convert_char2(uint2); char2 __ovld __cnfn convert_char2_sat(uint2); char2 __ovld __cnfn convert_char2_rte(long2); char2 __ovld __cnfn convert_char2_sat_rte(long2); char2 __ovld __cnfn convert_char2_rtz(long2); char2 __ovld __cnfn convert_char2_sat_rtz(long2); char2 __ovld __cnfn convert_char2_rtp(long2); char2 __ovld __cnfn convert_char2_sat_rtp(long2); char2 __ovld __cnfn convert_char2_rtn(long2); char2 __ovld __cnfn convert_char2_sat_rtn(long2); char2 __ovld __cnfn convert_char2(long2); char2 __ovld __cnfn convert_char2_sat(long2); char2 __ovld __cnfn convert_char2_rte(ulong2); char2 __ovld __cnfn convert_char2_sat_rte(ulong2); char2 __ovld __cnfn convert_char2_rtz(ulong2); char2 __ovld __cnfn convert_char2_sat_rtz(ulong2); char2 __ovld __cnfn convert_char2_rtp(ulong2); char2 __ovld __cnfn convert_char2_sat_rtp(ulong2); char2 __ovld __cnfn convert_char2_rtn(ulong2); char2 __ovld __cnfn convert_char2_sat_rtn(ulong2); char2 __ovld __cnfn convert_char2(ulong2); char2 __ovld __cnfn convert_char2_sat(ulong2); char2 __ovld __cnfn convert_char2_rte(float2); char2 __ovld __cnfn convert_char2_sat_rte(float2); char2 __ovld __cnfn convert_char2_rtz(float2); char2 __ovld __cnfn convert_char2_sat_rtz(float2); char2 __ovld __cnfn convert_char2_rtp(float2); char2 __ovld __cnfn convert_char2_sat_rtp(float2); char2 __ovld __cnfn convert_char2_rtn(float2); char2 __ovld __cnfn convert_char2_sat_rtn(float2); char2 __ovld __cnfn convert_char2(float2); char2 __ovld __cnfn convert_char2_sat(float2); uchar2 __ovld __cnfn convert_uchar2_rte(char2); uchar2 __ovld __cnfn convert_uchar2_sat_rte(char2); uchar2 __ovld __cnfn convert_uchar2_rtz(char2); uchar2 __ovld __cnfn convert_uchar2_sat_rtz(char2); uchar2 __ovld __cnfn convert_uchar2_rtp(char2); uchar2 __ovld __cnfn convert_uchar2_sat_rtp(char2); uchar2 __ovld __cnfn convert_uchar2_rtn(char2); uchar2 __ovld __cnfn convert_uchar2_sat_rtn(char2); uchar2 __ovld __cnfn convert_uchar2(char2); uchar2 __ovld __cnfn convert_uchar2_sat(char2); uchar2 __ovld __cnfn convert_uchar2_rte(uchar2); uchar2 __ovld __cnfn convert_uchar2_sat_rte(uchar2); uchar2 __ovld __cnfn convert_uchar2_rtz(uchar2); uchar2 __ovld __cnfn convert_uchar2_sat_rtz(uchar2); uchar2 __ovld __cnfn convert_uchar2_rtp(uchar2); uchar2 __ovld __cnfn convert_uchar2_sat_rtp(uchar2); uchar2 __ovld __cnfn convert_uchar2_rtn(uchar2); uchar2 __ovld __cnfn convert_uchar2_sat_rtn(uchar2); uchar2 __ovld __cnfn convert_uchar2(uchar2); uchar2 __ovld __cnfn convert_uchar2_sat(uchar2); uchar2 __ovld __cnfn convert_uchar2_rte(short2); uchar2 __ovld __cnfn convert_uchar2_sat_rte(short2); uchar2 __ovld __cnfn convert_uchar2_rtz(short2); uchar2 __ovld __cnfn convert_uchar2_sat_rtz(short2); uchar2 __ovld __cnfn convert_uchar2_rtp(short2); uchar2 __ovld __cnfn convert_uchar2_sat_rtp(short2); uchar2 __ovld __cnfn convert_uchar2_rtn(short2); uchar2 __ovld __cnfn convert_uchar2_sat_rtn(short2); uchar2 __ovld __cnfn convert_uchar2(short2); uchar2 __ovld __cnfn convert_uchar2_sat(short2); uchar2 __ovld __cnfn convert_uchar2_rte(ushort2); uchar2 __ovld __cnfn convert_uchar2_sat_rte(ushort2); uchar2 __ovld __cnfn convert_uchar2_rtz(ushort2); uchar2 __ovld __cnfn convert_uchar2_sat_rtz(ushort2); uchar2 __ovld __cnfn convert_uchar2_rtp(ushort2); uchar2 __ovld __cnfn convert_uchar2_sat_rtp(ushort2); uchar2 __ovld __cnfn convert_uchar2_rtn(ushort2); uchar2 __ovld __cnfn convert_uchar2_sat_rtn(ushort2); uchar2 __ovld __cnfn convert_uchar2(ushort2); uchar2 __ovld __cnfn convert_uchar2_sat(ushort2); uchar2 __ovld __cnfn convert_uchar2_rte(int2); uchar2 __ovld __cnfn convert_uchar2_sat_rte(int2); uchar2 __ovld __cnfn convert_uchar2_rtz(int2); uchar2 __ovld __cnfn convert_uchar2_sat_rtz(int2); uchar2 __ovld __cnfn convert_uchar2_rtp(int2); uchar2 __ovld __cnfn convert_uchar2_sat_rtp(int2); uchar2 __ovld __cnfn convert_uchar2_rtn(int2); uchar2 __ovld __cnfn convert_uchar2_sat_rtn(int2); uchar2 __ovld __cnfn convert_uchar2(int2); uchar2 __ovld __cnfn convert_uchar2_sat(int2); uchar2 __ovld __cnfn convert_uchar2_rte(uint2); uchar2 __ovld __cnfn convert_uchar2_sat_rte(uint2); uchar2 __ovld __cnfn convert_uchar2_rtz(uint2); uchar2 __ovld __cnfn convert_uchar2_sat_rtz(uint2); uchar2 __ovld __cnfn convert_uchar2_rtp(uint2); uchar2 __ovld __cnfn convert_uchar2_sat_rtp(uint2); uchar2 __ovld __cnfn convert_uchar2_rtn(uint2); uchar2 __ovld __cnfn convert_uchar2_sat_rtn(uint2); uchar2 __ovld __cnfn convert_uchar2(uint2); uchar2 __ovld __cnfn convert_uchar2_sat(uint2); uchar2 __ovld __cnfn convert_uchar2_rte(long2); uchar2 __ovld __cnfn convert_uchar2_sat_rte(long2); uchar2 __ovld __cnfn convert_uchar2_rtz(long2); uchar2 __ovld __cnfn convert_uchar2_sat_rtz(long2); uchar2 __ovld __cnfn convert_uchar2_rtp(long2); uchar2 __ovld __cnfn convert_uchar2_sat_rtp(long2); uchar2 __ovld __cnfn convert_uchar2_rtn(long2); uchar2 __ovld __cnfn convert_uchar2_sat_rtn(long2); uchar2 __ovld __cnfn convert_uchar2(long2); uchar2 __ovld __cnfn convert_uchar2_sat(long2); uchar2 __ovld __cnfn convert_uchar2_rte(ulong2); uchar2 __ovld __cnfn convert_uchar2_sat_rte(ulong2); uchar2 __ovld __cnfn convert_uchar2_rtz(ulong2); uchar2 __ovld __cnfn convert_uchar2_sat_rtz(ulong2); uchar2 __ovld __cnfn convert_uchar2_rtp(ulong2); uchar2 __ovld __cnfn convert_uchar2_sat_rtp(ulong2); uchar2 __ovld __cnfn convert_uchar2_rtn(ulong2); uchar2 __ovld __cnfn convert_uchar2_sat_rtn(ulong2); uchar2 __ovld __cnfn convert_uchar2(ulong2); uchar2 __ovld __cnfn convert_uchar2_sat(ulong2); uchar2 __ovld __cnfn convert_uchar2_rte(float2); uchar2 __ovld __cnfn convert_uchar2_sat_rte(float2); uchar2 __ovld __cnfn convert_uchar2_rtz(float2); uchar2 __ovld __cnfn convert_uchar2_sat_rtz(float2); uchar2 __ovld __cnfn convert_uchar2_rtp(float2); uchar2 __ovld __cnfn convert_uchar2_sat_rtp(float2); uchar2 __ovld __cnfn convert_uchar2_rtn(float2); uchar2 __ovld __cnfn convert_uchar2_sat_rtn(float2); uchar2 __ovld __cnfn convert_uchar2(float2); uchar2 __ovld __cnfn convert_uchar2_sat(float2); short2 __ovld __cnfn convert_short2_rte(char2); short2 __ovld __cnfn convert_short2_sat_rte(char2); short2 __ovld __cnfn convert_short2_rtz(char2); short2 __ovld __cnfn convert_short2_sat_rtz(char2); short2 __ovld __cnfn convert_short2_rtp(char2); short2 __ovld __cnfn convert_short2_sat_rtp(char2); short2 __ovld __cnfn convert_short2_rtn(char2); short2 __ovld __cnfn convert_short2_sat_rtn(char2); short2 __ovld __cnfn convert_short2(char2); short2 __ovld __cnfn convert_short2_sat(char2); short2 __ovld __cnfn convert_short2_rte(uchar2); short2 __ovld __cnfn convert_short2_sat_rte(uchar2); short2 __ovld __cnfn convert_short2_rtz(uchar2); short2 __ovld __cnfn convert_short2_sat_rtz(uchar2); short2 __ovld __cnfn convert_short2_rtp(uchar2); short2 __ovld __cnfn convert_short2_sat_rtp(uchar2); short2 __ovld __cnfn convert_short2_rtn(uchar2); short2 __ovld __cnfn convert_short2_sat_rtn(uchar2); short2 __ovld __cnfn convert_short2(uchar2); short2 __ovld __cnfn convert_short2_sat(uchar2); short2 __ovld __cnfn convert_short2_rte(short2); short2 __ovld __cnfn convert_short2_sat_rte(short2); short2 __ovld __cnfn convert_short2_rtz(short2); short2 __ovld __cnfn convert_short2_sat_rtz(short2); short2 __ovld __cnfn convert_short2_rtp(short2); short2 __ovld __cnfn convert_short2_sat_rtp(short2); short2 __ovld __cnfn convert_short2_rtn(short2); short2 __ovld __cnfn convert_short2_sat_rtn(short2); short2 __ovld __cnfn convert_short2(short2); short2 __ovld __cnfn convert_short2_sat(short2); short2 __ovld __cnfn convert_short2_rte(ushort2); short2 __ovld __cnfn convert_short2_sat_rte(ushort2); short2 __ovld __cnfn convert_short2_rtz(ushort2); short2 __ovld __cnfn convert_short2_sat_rtz(ushort2); short2 __ovld __cnfn convert_short2_rtp(ushort2); short2 __ovld __cnfn convert_short2_sat_rtp(ushort2); short2 __ovld __cnfn convert_short2_rtn(ushort2); short2 __ovld __cnfn convert_short2_sat_rtn(ushort2); short2 __ovld __cnfn convert_short2(ushort2); short2 __ovld __cnfn convert_short2_sat(ushort2); short2 __ovld __cnfn convert_short2_rte(int2); short2 __ovld __cnfn convert_short2_sat_rte(int2); short2 __ovld __cnfn convert_short2_rtz(int2); short2 __ovld __cnfn convert_short2_sat_rtz(int2); short2 __ovld __cnfn convert_short2_rtp(int2); short2 __ovld __cnfn convert_short2_sat_rtp(int2); short2 __ovld __cnfn convert_short2_rtn(int2); short2 __ovld __cnfn convert_short2_sat_rtn(int2); short2 __ovld __cnfn convert_short2(int2); short2 __ovld __cnfn convert_short2_sat(int2); short2 __ovld __cnfn convert_short2_rte(uint2); short2 __ovld __cnfn convert_short2_sat_rte(uint2); short2 __ovld __cnfn convert_short2_rtz(uint2); short2 __ovld __cnfn convert_short2_sat_rtz(uint2); short2 __ovld __cnfn convert_short2_rtp(uint2); short2 __ovld __cnfn convert_short2_sat_rtp(uint2); short2 __ovld __cnfn convert_short2_rtn(uint2); short2 __ovld __cnfn convert_short2_sat_rtn(uint2); short2 __ovld __cnfn convert_short2(uint2); short2 __ovld __cnfn convert_short2_sat(uint2); short2 __ovld __cnfn convert_short2_rte(long2); short2 __ovld __cnfn convert_short2_sat_rte(long2); short2 __ovld __cnfn convert_short2_rtz(long2); short2 __ovld __cnfn convert_short2_sat_rtz(long2); short2 __ovld __cnfn convert_short2_rtp(long2); short2 __ovld __cnfn convert_short2_sat_rtp(long2); short2 __ovld __cnfn convert_short2_rtn(long2); short2 __ovld __cnfn convert_short2_sat_rtn(long2); short2 __ovld __cnfn convert_short2(long2); short2 __ovld __cnfn convert_short2_sat(long2); short2 __ovld __cnfn convert_short2_rte(ulong2); short2 __ovld __cnfn convert_short2_sat_rte(ulong2); short2 __ovld __cnfn convert_short2_rtz(ulong2); short2 __ovld __cnfn convert_short2_sat_rtz(ulong2); short2 __ovld __cnfn convert_short2_rtp(ulong2); short2 __ovld __cnfn convert_short2_sat_rtp(ulong2); short2 __ovld __cnfn convert_short2_rtn(ulong2); short2 __ovld __cnfn convert_short2_sat_rtn(ulong2); short2 __ovld __cnfn convert_short2(ulong2); short2 __ovld __cnfn convert_short2_sat(ulong2); short2 __ovld __cnfn convert_short2_rte(float2); short2 __ovld __cnfn convert_short2_sat_rte(float2); short2 __ovld __cnfn convert_short2_rtz(float2); short2 __ovld __cnfn convert_short2_sat_rtz(float2); short2 __ovld __cnfn convert_short2_rtp(float2); short2 __ovld __cnfn convert_short2_sat_rtp(float2); short2 __ovld __cnfn convert_short2_rtn(float2); short2 __ovld __cnfn convert_short2_sat_rtn(float2); short2 __ovld __cnfn convert_short2(float2); short2 __ovld __cnfn convert_short2_sat(float2); ushort2 __ovld __cnfn convert_ushort2_rte(char2); ushort2 __ovld __cnfn convert_ushort2_sat_rte(char2); ushort2 __ovld __cnfn convert_ushort2_rtz(char2); ushort2 __ovld __cnfn convert_ushort2_sat_rtz(char2); ushort2 __ovld __cnfn convert_ushort2_rtp(char2); ushort2 __ovld __cnfn convert_ushort2_sat_rtp(char2); ushort2 __ovld __cnfn convert_ushort2_rtn(char2); ushort2 __ovld __cnfn convert_ushort2_sat_rtn(char2); ushort2 __ovld __cnfn convert_ushort2(char2); ushort2 __ovld __cnfn convert_ushort2_sat(char2); ushort2 __ovld __cnfn convert_ushort2_rte(uchar2); ushort2 __ovld __cnfn convert_ushort2_sat_rte(uchar2); ushort2 __ovld __cnfn convert_ushort2_rtz(uchar2); ushort2 __ovld __cnfn convert_ushort2_sat_rtz(uchar2); ushort2 __ovld __cnfn convert_ushort2_rtp(uchar2); ushort2 __ovld __cnfn convert_ushort2_sat_rtp(uchar2); ushort2 __ovld __cnfn convert_ushort2_rtn(uchar2); ushort2 __ovld __cnfn convert_ushort2_sat_rtn(uchar2); ushort2 __ovld __cnfn convert_ushort2(uchar2); ushort2 __ovld __cnfn convert_ushort2_sat(uchar2); ushort2 __ovld __cnfn convert_ushort2_rte(short2); ushort2 __ovld __cnfn convert_ushort2_sat_rte(short2); ushort2 __ovld __cnfn convert_ushort2_rtz(short2); ushort2 __ovld __cnfn convert_ushort2_sat_rtz(short2); ushort2 __ovld __cnfn convert_ushort2_rtp(short2); ushort2 __ovld __cnfn convert_ushort2_sat_rtp(short2); ushort2 __ovld __cnfn convert_ushort2_rtn(short2); ushort2 __ovld __cnfn convert_ushort2_sat_rtn(short2); ushort2 __ovld __cnfn convert_ushort2(short2); ushort2 __ovld __cnfn convert_ushort2_sat(short2); ushort2 __ovld __cnfn convert_ushort2_rte(ushort2); ushort2 __ovld __cnfn convert_ushort2_sat_rte(ushort2); ushort2 __ovld __cnfn convert_ushort2_rtz(ushort2); ushort2 __ovld __cnfn convert_ushort2_sat_rtz(ushort2); ushort2 __ovld __cnfn convert_ushort2_rtp(ushort2); ushort2 __ovld __cnfn convert_ushort2_sat_rtp(ushort2); ushort2 __ovld __cnfn convert_ushort2_rtn(ushort2); ushort2 __ovld __cnfn convert_ushort2_sat_rtn(ushort2); ushort2 __ovld __cnfn convert_ushort2(ushort2); ushort2 __ovld __cnfn convert_ushort2_sat(ushort2); ushort2 __ovld __cnfn convert_ushort2_rte(int2); ushort2 __ovld __cnfn convert_ushort2_sat_rte(int2); ushort2 __ovld __cnfn convert_ushort2_rtz(int2); ushort2 __ovld __cnfn convert_ushort2_sat_rtz(int2); ushort2 __ovld __cnfn convert_ushort2_rtp(int2); ushort2 __ovld __cnfn convert_ushort2_sat_rtp(int2); ushort2 __ovld __cnfn convert_ushort2_rtn(int2); ushort2 __ovld __cnfn convert_ushort2_sat_rtn(int2); ushort2 __ovld __cnfn convert_ushort2(int2); ushort2 __ovld __cnfn convert_ushort2_sat(int2); ushort2 __ovld __cnfn convert_ushort2_rte(uint2); ushort2 __ovld __cnfn convert_ushort2_sat_rte(uint2); ushort2 __ovld __cnfn convert_ushort2_rtz(uint2); ushort2 __ovld __cnfn convert_ushort2_sat_rtz(uint2); ushort2 __ovld __cnfn convert_ushort2_rtp(uint2); ushort2 __ovld __cnfn convert_ushort2_sat_rtp(uint2); ushort2 __ovld __cnfn convert_ushort2_rtn(uint2); ushort2 __ovld __cnfn convert_ushort2_sat_rtn(uint2); ushort2 __ovld __cnfn convert_ushort2(uint2); ushort2 __ovld __cnfn convert_ushort2_sat(uint2); ushort2 __ovld __cnfn convert_ushort2_rte(long2); ushort2 __ovld __cnfn convert_ushort2_sat_rte(long2); ushort2 __ovld __cnfn convert_ushort2_rtz(long2); ushort2 __ovld __cnfn convert_ushort2_sat_rtz(long2); ushort2 __ovld __cnfn convert_ushort2_rtp(long2); ushort2 __ovld __cnfn convert_ushort2_sat_rtp(long2); ushort2 __ovld __cnfn convert_ushort2_rtn(long2); ushort2 __ovld __cnfn convert_ushort2_sat_rtn(long2); ushort2 __ovld __cnfn convert_ushort2(long2); ushort2 __ovld __cnfn convert_ushort2_sat(long2); ushort2 __ovld __cnfn convert_ushort2_rte(ulong2); ushort2 __ovld __cnfn convert_ushort2_sat_rte(ulong2); ushort2 __ovld __cnfn convert_ushort2_rtz(ulong2); ushort2 __ovld __cnfn convert_ushort2_sat_rtz(ulong2); ushort2 __ovld __cnfn convert_ushort2_rtp(ulong2); ushort2 __ovld __cnfn convert_ushort2_sat_rtp(ulong2); ushort2 __ovld __cnfn convert_ushort2_rtn(ulong2); ushort2 __ovld __cnfn convert_ushort2_sat_rtn(ulong2); ushort2 __ovld __cnfn convert_ushort2(ulong2); ushort2 __ovld __cnfn convert_ushort2_sat(ulong2); ushort2 __ovld __cnfn convert_ushort2_rte(float2); ushort2 __ovld __cnfn convert_ushort2_sat_rte(float2); ushort2 __ovld __cnfn convert_ushort2_rtz(float2); ushort2 __ovld __cnfn convert_ushort2_sat_rtz(float2); ushort2 __ovld __cnfn convert_ushort2_rtp(float2); ushort2 __ovld __cnfn convert_ushort2_sat_rtp(float2); ushort2 __ovld __cnfn convert_ushort2_rtn(float2); ushort2 __ovld __cnfn convert_ushort2_sat_rtn(float2); ushort2 __ovld __cnfn convert_ushort2(float2); ushort2 __ovld __cnfn convert_ushort2_sat(float2); int2 __ovld __cnfn convert_int2_rte(char2); int2 __ovld __cnfn convert_int2_sat_rte(char2); int2 __ovld __cnfn convert_int2_rtz(char2); int2 __ovld __cnfn convert_int2_sat_rtz(char2); int2 __ovld __cnfn convert_int2_rtp(char2); int2 __ovld __cnfn convert_int2_sat_rtp(char2); int2 __ovld __cnfn convert_int2_rtn(char2); int2 __ovld __cnfn convert_int2_sat_rtn(char2); int2 __ovld __cnfn convert_int2(char2); int2 __ovld __cnfn convert_int2_sat(char2); int2 __ovld __cnfn convert_int2_rte(uchar2); int2 __ovld __cnfn convert_int2_sat_rte(uchar2); int2 __ovld __cnfn convert_int2_rtz(uchar2); int2 __ovld __cnfn convert_int2_sat_rtz(uchar2); int2 __ovld __cnfn convert_int2_rtp(uchar2); int2 __ovld __cnfn convert_int2_sat_rtp(uchar2); int2 __ovld __cnfn convert_int2_rtn(uchar2); int2 __ovld __cnfn convert_int2_sat_rtn(uchar2); int2 __ovld __cnfn convert_int2(uchar2); int2 __ovld __cnfn convert_int2_sat(uchar2); int2 __ovld __cnfn convert_int2_rte(short2); int2 __ovld __cnfn convert_int2_sat_rte(short2); int2 __ovld __cnfn convert_int2_rtz(short2); int2 __ovld __cnfn convert_int2_sat_rtz(short2); int2 __ovld __cnfn convert_int2_rtp(short2); int2 __ovld __cnfn convert_int2_sat_rtp(short2); int2 __ovld __cnfn convert_int2_rtn(short2); int2 __ovld __cnfn convert_int2_sat_rtn(short2); int2 __ovld __cnfn convert_int2(short2); int2 __ovld __cnfn convert_int2_sat(short2); int2 __ovld __cnfn convert_int2_rte(ushort2); int2 __ovld __cnfn convert_int2_sat_rte(ushort2); int2 __ovld __cnfn convert_int2_rtz(ushort2); int2 __ovld __cnfn convert_int2_sat_rtz(ushort2); int2 __ovld __cnfn convert_int2_rtp(ushort2); int2 __ovld __cnfn convert_int2_sat_rtp(ushort2); int2 __ovld __cnfn convert_int2_rtn(ushort2); int2 __ovld __cnfn convert_int2_sat_rtn(ushort2); int2 __ovld __cnfn convert_int2(ushort2); int2 __ovld __cnfn convert_int2_sat(ushort2); int2 __ovld __cnfn convert_int2_rte(int2); int2 __ovld __cnfn convert_int2_sat_rte(int2); int2 __ovld __cnfn convert_int2_rtz(int2); int2 __ovld __cnfn convert_int2_sat_rtz(int2); int2 __ovld __cnfn convert_int2_rtp(int2); int2 __ovld __cnfn convert_int2_sat_rtp(int2); int2 __ovld __cnfn convert_int2_rtn(int2); int2 __ovld __cnfn convert_int2_sat_rtn(int2); int2 __ovld __cnfn convert_int2(int2); int2 __ovld __cnfn convert_int2_sat(int2); int2 __ovld __cnfn convert_int2_rte(uint2); int2 __ovld __cnfn convert_int2_sat_rte(uint2); int2 __ovld __cnfn convert_int2_rtz(uint2); int2 __ovld __cnfn convert_int2_sat_rtz(uint2); int2 __ovld __cnfn convert_int2_rtp(uint2); int2 __ovld __cnfn convert_int2_sat_rtp(uint2); int2 __ovld __cnfn convert_int2_rtn(uint2); int2 __ovld __cnfn convert_int2_sat_rtn(uint2); int2 __ovld __cnfn convert_int2(uint2); int2 __ovld __cnfn convert_int2_sat(uint2); int2 __ovld __cnfn convert_int2_rte(long2); int2 __ovld __cnfn convert_int2_sat_rte(long2); int2 __ovld __cnfn convert_int2_rtz(long2); int2 __ovld __cnfn convert_int2_sat_rtz(long2); int2 __ovld __cnfn convert_int2_rtp(long2); int2 __ovld __cnfn convert_int2_sat_rtp(long2); int2 __ovld __cnfn convert_int2_rtn(long2); int2 __ovld __cnfn convert_int2_sat_rtn(long2); int2 __ovld __cnfn convert_int2(long2); int2 __ovld __cnfn convert_int2_sat(long2); int2 __ovld __cnfn convert_int2_rte(ulong2); int2 __ovld __cnfn convert_int2_sat_rte(ulong2); int2 __ovld __cnfn convert_int2_rtz(ulong2); int2 __ovld __cnfn convert_int2_sat_rtz(ulong2); int2 __ovld __cnfn convert_int2_rtp(ulong2); int2 __ovld __cnfn convert_int2_sat_rtp(ulong2); int2 __ovld __cnfn convert_int2_rtn(ulong2); int2 __ovld __cnfn convert_int2_sat_rtn(ulong2); int2 __ovld __cnfn convert_int2(ulong2); int2 __ovld __cnfn convert_int2_sat(ulong2); int2 __ovld __cnfn convert_int2_rte(float2); int2 __ovld __cnfn convert_int2_sat_rte(float2); int2 __ovld __cnfn convert_int2_rtz(float2); int2 __ovld __cnfn convert_int2_sat_rtz(float2); int2 __ovld __cnfn convert_int2_rtp(float2); int2 __ovld __cnfn convert_int2_sat_rtp(float2); int2 __ovld __cnfn convert_int2_rtn(float2); int2 __ovld __cnfn convert_int2_sat_rtn(float2); int2 __ovld __cnfn convert_int2(float2); int2 __ovld __cnfn convert_int2_sat(float2); uint2 __ovld __cnfn convert_uint2_rte(char2); uint2 __ovld __cnfn convert_uint2_sat_rte(char2); uint2 __ovld __cnfn convert_uint2_rtz(char2); uint2 __ovld __cnfn convert_uint2_sat_rtz(char2); uint2 __ovld __cnfn convert_uint2_rtp(char2); uint2 __ovld __cnfn convert_uint2_sat_rtp(char2); uint2 __ovld __cnfn convert_uint2_rtn(char2); uint2 __ovld __cnfn convert_uint2_sat_rtn(char2); uint2 __ovld __cnfn convert_uint2(char2); uint2 __ovld __cnfn convert_uint2_sat(char2); uint2 __ovld __cnfn convert_uint2_rte(uchar2); uint2 __ovld __cnfn convert_uint2_sat_rte(uchar2); uint2 __ovld __cnfn convert_uint2_rtz(uchar2); uint2 __ovld __cnfn convert_uint2_sat_rtz(uchar2); uint2 __ovld __cnfn convert_uint2_rtp(uchar2); uint2 __ovld __cnfn convert_uint2_sat_rtp(uchar2); uint2 __ovld __cnfn convert_uint2_rtn(uchar2); uint2 __ovld __cnfn convert_uint2_sat_rtn(uchar2); uint2 __ovld __cnfn convert_uint2(uchar2); uint2 __ovld __cnfn convert_uint2_sat(uchar2); uint2 __ovld __cnfn convert_uint2_rte(short2); uint2 __ovld __cnfn convert_uint2_sat_rte(short2); uint2 __ovld __cnfn convert_uint2_rtz(short2); uint2 __ovld __cnfn convert_uint2_sat_rtz(short2); uint2 __ovld __cnfn convert_uint2_rtp(short2); uint2 __ovld __cnfn convert_uint2_sat_rtp(short2); uint2 __ovld __cnfn convert_uint2_rtn(short2); uint2 __ovld __cnfn convert_uint2_sat_rtn(short2); uint2 __ovld __cnfn convert_uint2(short2); uint2 __ovld __cnfn convert_uint2_sat(short2); uint2 __ovld __cnfn convert_uint2_rte(ushort2); uint2 __ovld __cnfn convert_uint2_sat_rte(ushort2); uint2 __ovld __cnfn convert_uint2_rtz(ushort2); uint2 __ovld __cnfn convert_uint2_sat_rtz(ushort2); uint2 __ovld __cnfn convert_uint2_rtp(ushort2); uint2 __ovld __cnfn convert_uint2_sat_rtp(ushort2); uint2 __ovld __cnfn convert_uint2_rtn(ushort2); uint2 __ovld __cnfn convert_uint2_sat_rtn(ushort2); uint2 __ovld __cnfn convert_uint2(ushort2); uint2 __ovld __cnfn convert_uint2_sat(ushort2); uint2 __ovld __cnfn convert_uint2_rte(int2); uint2 __ovld __cnfn convert_uint2_sat_rte(int2); uint2 __ovld __cnfn convert_uint2_rtz(int2); uint2 __ovld __cnfn convert_uint2_sat_rtz(int2); uint2 __ovld __cnfn convert_uint2_rtp(int2); uint2 __ovld __cnfn convert_uint2_sat_rtp(int2); uint2 __ovld __cnfn convert_uint2_rtn(int2); uint2 __ovld __cnfn convert_uint2_sat_rtn(int2); uint2 __ovld __cnfn convert_uint2(int2); uint2 __ovld __cnfn convert_uint2_sat(int2); uint2 __ovld __cnfn convert_uint2_rte(uint2); uint2 __ovld __cnfn convert_uint2_sat_rte(uint2); uint2 __ovld __cnfn convert_uint2_rtz(uint2); uint2 __ovld __cnfn convert_uint2_sat_rtz(uint2); uint2 __ovld __cnfn convert_uint2_rtp(uint2); uint2 __ovld __cnfn convert_uint2_sat_rtp(uint2); uint2 __ovld __cnfn convert_uint2_rtn(uint2); uint2 __ovld __cnfn convert_uint2_sat_rtn(uint2); uint2 __ovld __cnfn convert_uint2(uint2); uint2 __ovld __cnfn convert_uint2_sat(uint2); uint2 __ovld __cnfn convert_uint2_rte(long2); uint2 __ovld __cnfn convert_uint2_sat_rte(long2); uint2 __ovld __cnfn convert_uint2_rtz(long2); uint2 __ovld __cnfn convert_uint2_sat_rtz(long2); uint2 __ovld __cnfn convert_uint2_rtp(long2); uint2 __ovld __cnfn convert_uint2_sat_rtp(long2); uint2 __ovld __cnfn convert_uint2_rtn(long2); uint2 __ovld __cnfn convert_uint2_sat_rtn(long2); uint2 __ovld __cnfn convert_uint2(long2); uint2 __ovld __cnfn convert_uint2_sat(long2); uint2 __ovld __cnfn convert_uint2_rte(ulong2); uint2 __ovld __cnfn convert_uint2_sat_rte(ulong2); uint2 __ovld __cnfn convert_uint2_rtz(ulong2); uint2 __ovld __cnfn convert_uint2_sat_rtz(ulong2); uint2 __ovld __cnfn convert_uint2_rtp(ulong2); uint2 __ovld __cnfn convert_uint2_sat_rtp(ulong2); uint2 __ovld __cnfn convert_uint2_rtn(ulong2); uint2 __ovld __cnfn convert_uint2_sat_rtn(ulong2); uint2 __ovld __cnfn convert_uint2(ulong2); uint2 __ovld __cnfn convert_uint2_sat(ulong2); uint2 __ovld __cnfn convert_uint2_rte(float2); uint2 __ovld __cnfn convert_uint2_sat_rte(float2); uint2 __ovld __cnfn convert_uint2_rtz(float2); uint2 __ovld __cnfn convert_uint2_sat_rtz(float2); uint2 __ovld __cnfn convert_uint2_rtp(float2); uint2 __ovld __cnfn convert_uint2_sat_rtp(float2); uint2 __ovld __cnfn convert_uint2_rtn(float2); uint2 __ovld __cnfn convert_uint2_sat_rtn(float2); uint2 __ovld __cnfn convert_uint2(float2); uint2 __ovld __cnfn convert_uint2_sat(float2); long2 __ovld __cnfn convert_long2_rte(char2); long2 __ovld __cnfn convert_long2_sat_rte(char2); long2 __ovld __cnfn convert_long2_rtz(char2); long2 __ovld __cnfn convert_long2_sat_rtz(char2); long2 __ovld __cnfn convert_long2_rtp(char2); long2 __ovld __cnfn convert_long2_sat_rtp(char2); long2 __ovld __cnfn convert_long2_rtn(char2); long2 __ovld __cnfn convert_long2_sat_rtn(char2); long2 __ovld __cnfn convert_long2(char2); long2 __ovld __cnfn convert_long2_sat(char2); long2 __ovld __cnfn convert_long2_rte(uchar2); long2 __ovld __cnfn convert_long2_sat_rte(uchar2); long2 __ovld __cnfn convert_long2_rtz(uchar2); long2 __ovld __cnfn convert_long2_sat_rtz(uchar2); long2 __ovld __cnfn convert_long2_rtp(uchar2); long2 __ovld __cnfn convert_long2_sat_rtp(uchar2); long2 __ovld __cnfn convert_long2_rtn(uchar2); long2 __ovld __cnfn convert_long2_sat_rtn(uchar2); long2 __ovld __cnfn convert_long2(uchar2); long2 __ovld __cnfn convert_long2_sat(uchar2); long2 __ovld __cnfn convert_long2_rte(short2); long2 __ovld __cnfn convert_long2_sat_rte(short2); long2 __ovld __cnfn convert_long2_rtz(short2); long2 __ovld __cnfn convert_long2_sat_rtz(short2); long2 __ovld __cnfn convert_long2_rtp(short2); long2 __ovld __cnfn convert_long2_sat_rtp(short2); long2 __ovld __cnfn convert_long2_rtn(short2); long2 __ovld __cnfn convert_long2_sat_rtn(short2); long2 __ovld __cnfn convert_long2(short2); long2 __ovld __cnfn convert_long2_sat(short2); long2 __ovld __cnfn convert_long2_rte(ushort2); long2 __ovld __cnfn convert_long2_sat_rte(ushort2); long2 __ovld __cnfn convert_long2_rtz(ushort2); long2 __ovld __cnfn convert_long2_sat_rtz(ushort2); long2 __ovld __cnfn convert_long2_rtp(ushort2); long2 __ovld __cnfn convert_long2_sat_rtp(ushort2); long2 __ovld __cnfn convert_long2_rtn(ushort2); long2 __ovld __cnfn convert_long2_sat_rtn(ushort2); long2 __ovld __cnfn convert_long2(ushort2); long2 __ovld __cnfn convert_long2_sat(ushort2); long2 __ovld __cnfn convert_long2_rte(int2); long2 __ovld __cnfn convert_long2_sat_rte(int2); long2 __ovld __cnfn convert_long2_rtz(int2); long2 __ovld __cnfn convert_long2_sat_rtz(int2); long2 __ovld __cnfn convert_long2_rtp(int2); long2 __ovld __cnfn convert_long2_sat_rtp(int2); long2 __ovld __cnfn convert_long2_rtn(int2); long2 __ovld __cnfn convert_long2_sat_rtn(int2); long2 __ovld __cnfn convert_long2(int2); long2 __ovld __cnfn convert_long2_sat(int2); long2 __ovld __cnfn convert_long2_rte(uint2); long2 __ovld __cnfn convert_long2_sat_rte(uint2); long2 __ovld __cnfn convert_long2_rtz(uint2); long2 __ovld __cnfn convert_long2_sat_rtz(uint2); long2 __ovld __cnfn convert_long2_rtp(uint2); long2 __ovld __cnfn convert_long2_sat_rtp(uint2); long2 __ovld __cnfn convert_long2_rtn(uint2); long2 __ovld __cnfn convert_long2_sat_rtn(uint2); long2 __ovld __cnfn convert_long2(uint2); long2 __ovld __cnfn convert_long2_sat(uint2); long2 __ovld __cnfn convert_long2_rte(long2); long2 __ovld __cnfn convert_long2_sat_rte(long2); long2 __ovld __cnfn convert_long2_rtz(long2); long2 __ovld __cnfn convert_long2_sat_rtz(long2); long2 __ovld __cnfn convert_long2_rtp(long2); long2 __ovld __cnfn convert_long2_sat_rtp(long2); long2 __ovld __cnfn convert_long2_rtn(long2); long2 __ovld __cnfn convert_long2_sat_rtn(long2); long2 __ovld __cnfn convert_long2(long2); long2 __ovld __cnfn convert_long2_sat(long2); long2 __ovld __cnfn convert_long2_rte(ulong2); long2 __ovld __cnfn convert_long2_sat_rte(ulong2); long2 __ovld __cnfn convert_long2_rtz(ulong2); long2 __ovld __cnfn convert_long2_sat_rtz(ulong2); long2 __ovld __cnfn convert_long2_rtp(ulong2); long2 __ovld __cnfn convert_long2_sat_rtp(ulong2); long2 __ovld __cnfn convert_long2_rtn(ulong2); long2 __ovld __cnfn convert_long2_sat_rtn(ulong2); long2 __ovld __cnfn convert_long2(ulong2); long2 __ovld __cnfn convert_long2_sat(ulong2); long2 __ovld __cnfn convert_long2_rte(float2); long2 __ovld __cnfn convert_long2_sat_rte(float2); long2 __ovld __cnfn convert_long2_rtz(float2); long2 __ovld __cnfn convert_long2_sat_rtz(float2); long2 __ovld __cnfn convert_long2_rtp(float2); long2 __ovld __cnfn convert_long2_sat_rtp(float2); long2 __ovld __cnfn convert_long2_rtn(float2); long2 __ovld __cnfn convert_long2_sat_rtn(float2); long2 __ovld __cnfn convert_long2(float2); long2 __ovld __cnfn convert_long2_sat(float2); ulong2 __ovld __cnfn convert_ulong2_rte(char2); ulong2 __ovld __cnfn convert_ulong2_sat_rte(char2); ulong2 __ovld __cnfn convert_ulong2_rtz(char2); ulong2 __ovld __cnfn convert_ulong2_sat_rtz(char2); ulong2 __ovld __cnfn convert_ulong2_rtp(char2); ulong2 __ovld __cnfn convert_ulong2_sat_rtp(char2); ulong2 __ovld __cnfn convert_ulong2_rtn(char2); ulong2 __ovld __cnfn convert_ulong2_sat_rtn(char2); ulong2 __ovld __cnfn convert_ulong2(char2); ulong2 __ovld __cnfn convert_ulong2_sat(char2); ulong2 __ovld __cnfn convert_ulong2_rte(uchar2); ulong2 __ovld __cnfn convert_ulong2_sat_rte(uchar2); ulong2 __ovld __cnfn convert_ulong2_rtz(uchar2); ulong2 __ovld __cnfn convert_ulong2_sat_rtz(uchar2); ulong2 __ovld __cnfn convert_ulong2_rtp(uchar2); ulong2 __ovld __cnfn convert_ulong2_sat_rtp(uchar2); ulong2 __ovld __cnfn convert_ulong2_rtn(uchar2); ulong2 __ovld __cnfn convert_ulong2_sat_rtn(uchar2); ulong2 __ovld __cnfn convert_ulong2(uchar2); ulong2 __ovld __cnfn convert_ulong2_sat(uchar2); ulong2 __ovld __cnfn convert_ulong2_rte(short2); ulong2 __ovld __cnfn convert_ulong2_sat_rte(short2); ulong2 __ovld __cnfn convert_ulong2_rtz(short2); ulong2 __ovld __cnfn convert_ulong2_sat_rtz(short2); ulong2 __ovld __cnfn convert_ulong2_rtp(short2); ulong2 __ovld __cnfn convert_ulong2_sat_rtp(short2); ulong2 __ovld __cnfn convert_ulong2_rtn(short2); ulong2 __ovld __cnfn convert_ulong2_sat_rtn(short2); ulong2 __ovld __cnfn convert_ulong2(short2); ulong2 __ovld __cnfn convert_ulong2_sat(short2); ulong2 __ovld __cnfn convert_ulong2_rte(ushort2); ulong2 __ovld __cnfn convert_ulong2_sat_rte(ushort2); ulong2 __ovld __cnfn convert_ulong2_rtz(ushort2); ulong2 __ovld __cnfn convert_ulong2_sat_rtz(ushort2); ulong2 __ovld __cnfn convert_ulong2_rtp(ushort2); ulong2 __ovld __cnfn convert_ulong2_sat_rtp(ushort2); ulong2 __ovld __cnfn convert_ulong2_rtn(ushort2); ulong2 __ovld __cnfn convert_ulong2_sat_rtn(ushort2); ulong2 __ovld __cnfn convert_ulong2(ushort2); ulong2 __ovld __cnfn convert_ulong2_sat(ushort2); ulong2 __ovld __cnfn convert_ulong2_rte(int2); ulong2 __ovld __cnfn convert_ulong2_sat_rte(int2); ulong2 __ovld __cnfn convert_ulong2_rtz(int2); ulong2 __ovld __cnfn convert_ulong2_sat_rtz(int2); ulong2 __ovld __cnfn convert_ulong2_rtp(int2); ulong2 __ovld __cnfn convert_ulong2_sat_rtp(int2); ulong2 __ovld __cnfn convert_ulong2_rtn(int2); ulong2 __ovld __cnfn convert_ulong2_sat_rtn(int2); ulong2 __ovld __cnfn convert_ulong2(int2); ulong2 __ovld __cnfn convert_ulong2_sat(int2); ulong2 __ovld __cnfn convert_ulong2_rte(uint2); ulong2 __ovld __cnfn convert_ulong2_sat_rte(uint2); ulong2 __ovld __cnfn convert_ulong2_rtz(uint2); ulong2 __ovld __cnfn convert_ulong2_sat_rtz(uint2); ulong2 __ovld __cnfn convert_ulong2_rtp(uint2); ulong2 __ovld __cnfn convert_ulong2_sat_rtp(uint2); ulong2 __ovld __cnfn convert_ulong2_rtn(uint2); ulong2 __ovld __cnfn convert_ulong2_sat_rtn(uint2); ulong2 __ovld __cnfn convert_ulong2(uint2); ulong2 __ovld __cnfn convert_ulong2_sat(uint2); ulong2 __ovld __cnfn convert_ulong2_rte(long2); ulong2 __ovld __cnfn convert_ulong2_sat_rte(long2); ulong2 __ovld __cnfn convert_ulong2_rtz(long2); ulong2 __ovld __cnfn convert_ulong2_sat_rtz(long2); ulong2 __ovld __cnfn convert_ulong2_rtp(long2); ulong2 __ovld __cnfn convert_ulong2_sat_rtp(long2); ulong2 __ovld __cnfn convert_ulong2_rtn(long2); ulong2 __ovld __cnfn convert_ulong2_sat_rtn(long2); ulong2 __ovld __cnfn convert_ulong2(long2); ulong2 __ovld __cnfn convert_ulong2_sat(long2); ulong2 __ovld __cnfn convert_ulong2_rte(ulong2); ulong2 __ovld __cnfn convert_ulong2_sat_rte(ulong2); ulong2 __ovld __cnfn convert_ulong2_rtz(ulong2); ulong2 __ovld __cnfn convert_ulong2_sat_rtz(ulong2); ulong2 __ovld __cnfn convert_ulong2_rtp(ulong2); ulong2 __ovld __cnfn convert_ulong2_sat_rtp(ulong2); ulong2 __ovld __cnfn convert_ulong2_rtn(ulong2); ulong2 __ovld __cnfn convert_ulong2_sat_rtn(ulong2); ulong2 __ovld __cnfn convert_ulong2(ulong2); ulong2 __ovld __cnfn convert_ulong2_sat(ulong2); ulong2 __ovld __cnfn convert_ulong2_rte(float2); ulong2 __ovld __cnfn convert_ulong2_sat_rte(float2); ulong2 __ovld __cnfn convert_ulong2_rtz(float2); ulong2 __ovld __cnfn convert_ulong2_sat_rtz(float2); ulong2 __ovld __cnfn convert_ulong2_rtp(float2); ulong2 __ovld __cnfn convert_ulong2_sat_rtp(float2); ulong2 __ovld __cnfn convert_ulong2_rtn(float2); ulong2 __ovld __cnfn convert_ulong2_sat_rtn(float2); ulong2 __ovld __cnfn convert_ulong2(float2); ulong2 __ovld __cnfn convert_ulong2_sat(float2); float2 __ovld __cnfn convert_float2_rte(char2); float2 __ovld __cnfn convert_float2_rtz(char2); float2 __ovld __cnfn convert_float2_rtp(char2); float2 __ovld __cnfn convert_float2_rtn(char2); float2 __ovld __cnfn convert_float2(char2); float2 __ovld __cnfn convert_float2_rte(uchar2); float2 __ovld __cnfn convert_float2_rtz(uchar2); float2 __ovld __cnfn convert_float2_rtp(uchar2); float2 __ovld __cnfn convert_float2_rtn(uchar2); float2 __ovld __cnfn convert_float2(uchar2); float2 __ovld __cnfn convert_float2_rte(short2); float2 __ovld __cnfn convert_float2_rtz(short2); float2 __ovld __cnfn convert_float2_rtp(short2); float2 __ovld __cnfn convert_float2_rtn(short2); float2 __ovld __cnfn convert_float2(short2); float2 __ovld __cnfn convert_float2_rte(ushort2); float2 __ovld __cnfn convert_float2_rtz(ushort2); float2 __ovld __cnfn convert_float2_rtp(ushort2); float2 __ovld __cnfn convert_float2_rtn(ushort2); float2 __ovld __cnfn convert_float2(ushort2); float2 __ovld __cnfn convert_float2_rte(int2); float2 __ovld __cnfn convert_float2_rtz(int2); float2 __ovld __cnfn convert_float2_rtp(int2); float2 __ovld __cnfn convert_float2_rtn(int2); float2 __ovld __cnfn convert_float2(int2); float2 __ovld __cnfn convert_float2_rte(uint2); float2 __ovld __cnfn convert_float2_rtz(uint2); float2 __ovld __cnfn convert_float2_rtp(uint2); float2 __ovld __cnfn convert_float2_rtn(uint2); float2 __ovld __cnfn convert_float2(uint2); float2 __ovld __cnfn convert_float2_rte(long2); float2 __ovld __cnfn convert_float2_rtz(long2); float2 __ovld __cnfn convert_float2_rtp(long2); float2 __ovld __cnfn convert_float2_rtn(long2); float2 __ovld __cnfn convert_float2(long2); float2 __ovld __cnfn convert_float2_rte(ulong2); float2 __ovld __cnfn convert_float2_rtz(ulong2); float2 __ovld __cnfn convert_float2_rtp(ulong2); float2 __ovld __cnfn convert_float2_rtn(ulong2); float2 __ovld __cnfn convert_float2(ulong2); float2 __ovld __cnfn convert_float2_rte(float2); float2 __ovld __cnfn convert_float2_rtz(float2); float2 __ovld __cnfn convert_float2_rtp(float2); float2 __ovld __cnfn convert_float2_rtn(float2); float2 __ovld __cnfn convert_float2(float2); char3 __ovld __cnfn convert_char3_rte(char3); char3 __ovld __cnfn convert_char3_sat_rte(char3); char3 __ovld __cnfn convert_char3_rtz(char3); char3 __ovld __cnfn convert_char3_sat_rtz(char3); char3 __ovld __cnfn convert_char3_rtp(char3); char3 __ovld __cnfn convert_char3_sat_rtp(char3); char3 __ovld __cnfn convert_char3_rtn(char3); char3 __ovld __cnfn convert_char3_sat_rtn(char3); char3 __ovld __cnfn convert_char3(char3); char3 __ovld __cnfn convert_char3_sat(char3); char3 __ovld __cnfn convert_char3_rte(uchar3); char3 __ovld __cnfn convert_char3_sat_rte(uchar3); char3 __ovld __cnfn convert_char3_rtz(uchar3); char3 __ovld __cnfn convert_char3_sat_rtz(uchar3); char3 __ovld __cnfn convert_char3_rtp(uchar3); char3 __ovld __cnfn convert_char3_sat_rtp(uchar3); char3 __ovld __cnfn convert_char3_rtn(uchar3); char3 __ovld __cnfn convert_char3_sat_rtn(uchar3); char3 __ovld __cnfn convert_char3(uchar3); char3 __ovld __cnfn convert_char3_sat(uchar3); char3 __ovld __cnfn convert_char3_rte(short3); char3 __ovld __cnfn convert_char3_sat_rte(short3); char3 __ovld __cnfn convert_char3_rtz(short3); char3 __ovld __cnfn convert_char3_sat_rtz(short3); char3 __ovld __cnfn convert_char3_rtp(short3); char3 __ovld __cnfn convert_char3_sat_rtp(short3); char3 __ovld __cnfn convert_char3_rtn(short3); char3 __ovld __cnfn convert_char3_sat_rtn(short3); char3 __ovld __cnfn convert_char3(short3); char3 __ovld __cnfn convert_char3_sat(short3); char3 __ovld __cnfn convert_char3_rte(ushort3); char3 __ovld __cnfn convert_char3_sat_rte(ushort3); char3 __ovld __cnfn convert_char3_rtz(ushort3); char3 __ovld __cnfn convert_char3_sat_rtz(ushort3); char3 __ovld __cnfn convert_char3_rtp(ushort3); char3 __ovld __cnfn convert_char3_sat_rtp(ushort3); char3 __ovld __cnfn convert_char3_rtn(ushort3); char3 __ovld __cnfn convert_char3_sat_rtn(ushort3); char3 __ovld __cnfn convert_char3(ushort3); char3 __ovld __cnfn convert_char3_sat(ushort3); char3 __ovld __cnfn convert_char3_rte(int3); char3 __ovld __cnfn convert_char3_sat_rte(int3); char3 __ovld __cnfn convert_char3_rtz(int3); char3 __ovld __cnfn convert_char3_sat_rtz(int3); char3 __ovld __cnfn convert_char3_rtp(int3); char3 __ovld __cnfn convert_char3_sat_rtp(int3); char3 __ovld __cnfn convert_char3_rtn(int3); char3 __ovld __cnfn convert_char3_sat_rtn(int3); char3 __ovld __cnfn convert_char3(int3); char3 __ovld __cnfn convert_char3_sat(int3); char3 __ovld __cnfn convert_char3_rte(uint3); char3 __ovld __cnfn convert_char3_sat_rte(uint3); char3 __ovld __cnfn convert_char3_rtz(uint3); char3 __ovld __cnfn convert_char3_sat_rtz(uint3); char3 __ovld __cnfn convert_char3_rtp(uint3); char3 __ovld __cnfn convert_char3_sat_rtp(uint3); char3 __ovld __cnfn convert_char3_rtn(uint3); char3 __ovld __cnfn convert_char3_sat_rtn(uint3); char3 __ovld __cnfn convert_char3(uint3); char3 __ovld __cnfn convert_char3_sat(uint3); char3 __ovld __cnfn convert_char3_rte(long3); char3 __ovld __cnfn convert_char3_sat_rte(long3); char3 __ovld __cnfn convert_char3_rtz(long3); char3 __ovld __cnfn convert_char3_sat_rtz(long3); char3 __ovld __cnfn convert_char3_rtp(long3); char3 __ovld __cnfn convert_char3_sat_rtp(long3); char3 __ovld __cnfn convert_char3_rtn(long3); char3 __ovld __cnfn convert_char3_sat_rtn(long3); char3 __ovld __cnfn convert_char3(long3); char3 __ovld __cnfn convert_char3_sat(long3); char3 __ovld __cnfn convert_char3_rte(ulong3); char3 __ovld __cnfn convert_char3_sat_rte(ulong3); char3 __ovld __cnfn convert_char3_rtz(ulong3); char3 __ovld __cnfn convert_char3_sat_rtz(ulong3); char3 __ovld __cnfn convert_char3_rtp(ulong3); char3 __ovld __cnfn convert_char3_sat_rtp(ulong3); char3 __ovld __cnfn convert_char3_rtn(ulong3); char3 __ovld __cnfn convert_char3_sat_rtn(ulong3); char3 __ovld __cnfn convert_char3(ulong3); char3 __ovld __cnfn convert_char3_sat(ulong3); char3 __ovld __cnfn convert_char3_rte(float3); char3 __ovld __cnfn convert_char3_sat_rte(float3); char3 __ovld __cnfn convert_char3_rtz(float3); char3 __ovld __cnfn convert_char3_sat_rtz(float3); char3 __ovld __cnfn convert_char3_rtp(float3); char3 __ovld __cnfn convert_char3_sat_rtp(float3); char3 __ovld __cnfn convert_char3_rtn(float3); char3 __ovld __cnfn convert_char3_sat_rtn(float3); char3 __ovld __cnfn convert_char3(float3); char3 __ovld __cnfn convert_char3_sat(float3); uchar3 __ovld __cnfn convert_uchar3_rte(char3); uchar3 __ovld __cnfn convert_uchar3_sat_rte(char3); uchar3 __ovld __cnfn convert_uchar3_rtz(char3); uchar3 __ovld __cnfn convert_uchar3_sat_rtz(char3); uchar3 __ovld __cnfn convert_uchar3_rtp(char3); uchar3 __ovld __cnfn convert_uchar3_sat_rtp(char3); uchar3 __ovld __cnfn convert_uchar3_rtn(char3); uchar3 __ovld __cnfn convert_uchar3_sat_rtn(char3); uchar3 __ovld __cnfn convert_uchar3(char3); uchar3 __ovld __cnfn convert_uchar3_sat(char3); uchar3 __ovld __cnfn convert_uchar3_rte(uchar3); uchar3 __ovld __cnfn convert_uchar3_sat_rte(uchar3); uchar3 __ovld __cnfn convert_uchar3_rtz(uchar3); uchar3 __ovld __cnfn convert_uchar3_sat_rtz(uchar3); uchar3 __ovld __cnfn convert_uchar3_rtp(uchar3); uchar3 __ovld __cnfn convert_uchar3_sat_rtp(uchar3); uchar3 __ovld __cnfn convert_uchar3_rtn(uchar3); uchar3 __ovld __cnfn convert_uchar3_sat_rtn(uchar3); uchar3 __ovld __cnfn convert_uchar3(uchar3); uchar3 __ovld __cnfn convert_uchar3_sat(uchar3); uchar3 __ovld __cnfn convert_uchar3_rte(short3); uchar3 __ovld __cnfn convert_uchar3_sat_rte(short3); uchar3 __ovld __cnfn convert_uchar3_rtz(short3); uchar3 __ovld __cnfn convert_uchar3_sat_rtz(short3); uchar3 __ovld __cnfn convert_uchar3_rtp(short3); uchar3 __ovld __cnfn convert_uchar3_sat_rtp(short3); uchar3 __ovld __cnfn convert_uchar3_rtn(short3); uchar3 __ovld __cnfn convert_uchar3_sat_rtn(short3); uchar3 __ovld __cnfn convert_uchar3(short3); uchar3 __ovld __cnfn convert_uchar3_sat(short3); uchar3 __ovld __cnfn convert_uchar3_rte(ushort3); uchar3 __ovld __cnfn convert_uchar3_sat_rte(ushort3); uchar3 __ovld __cnfn convert_uchar3_rtz(ushort3); uchar3 __ovld __cnfn convert_uchar3_sat_rtz(ushort3); uchar3 __ovld __cnfn convert_uchar3_rtp(ushort3); uchar3 __ovld __cnfn convert_uchar3_sat_rtp(ushort3); uchar3 __ovld __cnfn convert_uchar3_rtn(ushort3); uchar3 __ovld __cnfn convert_uchar3_sat_rtn(ushort3); uchar3 __ovld __cnfn convert_uchar3(ushort3); uchar3 __ovld __cnfn convert_uchar3_sat(ushort3); uchar3 __ovld __cnfn convert_uchar3_rte(int3); uchar3 __ovld __cnfn convert_uchar3_sat_rte(int3); uchar3 __ovld __cnfn convert_uchar3_rtz(int3); uchar3 __ovld __cnfn convert_uchar3_sat_rtz(int3); uchar3 __ovld __cnfn convert_uchar3_rtp(int3); uchar3 __ovld __cnfn convert_uchar3_sat_rtp(int3); uchar3 __ovld __cnfn convert_uchar3_rtn(int3); uchar3 __ovld __cnfn convert_uchar3_sat_rtn(int3); uchar3 __ovld __cnfn convert_uchar3(int3); uchar3 __ovld __cnfn convert_uchar3_sat(int3); uchar3 __ovld __cnfn convert_uchar3_rte(uint3); uchar3 __ovld __cnfn convert_uchar3_sat_rte(uint3); uchar3 __ovld __cnfn convert_uchar3_rtz(uint3); uchar3 __ovld __cnfn convert_uchar3_sat_rtz(uint3); uchar3 __ovld __cnfn convert_uchar3_rtp(uint3); uchar3 __ovld __cnfn convert_uchar3_sat_rtp(uint3); uchar3 __ovld __cnfn convert_uchar3_rtn(uint3); uchar3 __ovld __cnfn convert_uchar3_sat_rtn(uint3); uchar3 __ovld __cnfn convert_uchar3(uint3); uchar3 __ovld __cnfn convert_uchar3_sat(uint3); uchar3 __ovld __cnfn convert_uchar3_rte(long3); uchar3 __ovld __cnfn convert_uchar3_sat_rte(long3); uchar3 __ovld __cnfn convert_uchar3_rtz(long3); uchar3 __ovld __cnfn convert_uchar3_sat_rtz(long3); uchar3 __ovld __cnfn convert_uchar3_rtp(long3); uchar3 __ovld __cnfn convert_uchar3_sat_rtp(long3); uchar3 __ovld __cnfn convert_uchar3_rtn(long3); uchar3 __ovld __cnfn convert_uchar3_sat_rtn(long3); uchar3 __ovld __cnfn convert_uchar3(long3); uchar3 __ovld __cnfn convert_uchar3_sat(long3); uchar3 __ovld __cnfn convert_uchar3_rte(ulong3); uchar3 __ovld __cnfn convert_uchar3_sat_rte(ulong3); uchar3 __ovld __cnfn convert_uchar3_rtz(ulong3); uchar3 __ovld __cnfn convert_uchar3_sat_rtz(ulong3); uchar3 __ovld __cnfn convert_uchar3_rtp(ulong3); uchar3 __ovld __cnfn convert_uchar3_sat_rtp(ulong3); uchar3 __ovld __cnfn convert_uchar3_rtn(ulong3); uchar3 __ovld __cnfn convert_uchar3_sat_rtn(ulong3); uchar3 __ovld __cnfn convert_uchar3(ulong3); uchar3 __ovld __cnfn convert_uchar3_sat(ulong3); uchar3 __ovld __cnfn convert_uchar3_rte(float3); uchar3 __ovld __cnfn convert_uchar3_sat_rte(float3); uchar3 __ovld __cnfn convert_uchar3_rtz(float3); uchar3 __ovld __cnfn convert_uchar3_sat_rtz(float3); uchar3 __ovld __cnfn convert_uchar3_rtp(float3); uchar3 __ovld __cnfn convert_uchar3_sat_rtp(float3); uchar3 __ovld __cnfn convert_uchar3_rtn(float3); uchar3 __ovld __cnfn convert_uchar3_sat_rtn(float3); uchar3 __ovld __cnfn convert_uchar3(float3); uchar3 __ovld __cnfn convert_uchar3_sat(float3); short3 __ovld __cnfn convert_short3_rte(char3); short3 __ovld __cnfn convert_short3_sat_rte(char3); short3 __ovld __cnfn convert_short3_rtz(char3); short3 __ovld __cnfn convert_short3_sat_rtz(char3); short3 __ovld __cnfn convert_short3_rtp(char3); short3 __ovld __cnfn convert_short3_sat_rtp(char3); short3 __ovld __cnfn convert_short3_rtn(char3); short3 __ovld __cnfn convert_short3_sat_rtn(char3); short3 __ovld __cnfn convert_short3(char3); short3 __ovld __cnfn convert_short3_sat(char3); short3 __ovld __cnfn convert_short3_rte(uchar3); short3 __ovld __cnfn convert_short3_sat_rte(uchar3); short3 __ovld __cnfn convert_short3_rtz(uchar3); short3 __ovld __cnfn convert_short3_sat_rtz(uchar3); short3 __ovld __cnfn convert_short3_rtp(uchar3); short3 __ovld __cnfn convert_short3_sat_rtp(uchar3); short3 __ovld __cnfn convert_short3_rtn(uchar3); short3 __ovld __cnfn convert_short3_sat_rtn(uchar3); short3 __ovld __cnfn convert_short3(uchar3); short3 __ovld __cnfn convert_short3_sat(uchar3); short3 __ovld __cnfn convert_short3_rte(short3); short3 __ovld __cnfn convert_short3_sat_rte(short3); short3 __ovld __cnfn convert_short3_rtz(short3); short3 __ovld __cnfn convert_short3_sat_rtz(short3); short3 __ovld __cnfn convert_short3_rtp(short3); short3 __ovld __cnfn convert_short3_sat_rtp(short3); short3 __ovld __cnfn convert_short3_rtn(short3); short3 __ovld __cnfn convert_short3_sat_rtn(short3); short3 __ovld __cnfn convert_short3(short3); short3 __ovld __cnfn convert_short3_sat(short3); short3 __ovld __cnfn convert_short3_rte(ushort3); short3 __ovld __cnfn convert_short3_sat_rte(ushort3); short3 __ovld __cnfn convert_short3_rtz(ushort3); short3 __ovld __cnfn convert_short3_sat_rtz(ushort3); short3 __ovld __cnfn convert_short3_rtp(ushort3); short3 __ovld __cnfn convert_short3_sat_rtp(ushort3); short3 __ovld __cnfn convert_short3_rtn(ushort3); short3 __ovld __cnfn convert_short3_sat_rtn(ushort3); short3 __ovld __cnfn convert_short3(ushort3); short3 __ovld __cnfn convert_short3_sat(ushort3); short3 __ovld __cnfn convert_short3_rte(int3); short3 __ovld __cnfn convert_short3_sat_rte(int3); short3 __ovld __cnfn convert_short3_rtz(int3); short3 __ovld __cnfn convert_short3_sat_rtz(int3); short3 __ovld __cnfn convert_short3_rtp(int3); short3 __ovld __cnfn convert_short3_sat_rtp(int3); short3 __ovld __cnfn convert_short3_rtn(int3); short3 __ovld __cnfn convert_short3_sat_rtn(int3); short3 __ovld __cnfn convert_short3(int3); short3 __ovld __cnfn convert_short3_sat(int3); short3 __ovld __cnfn convert_short3_rte(uint3); short3 __ovld __cnfn convert_short3_sat_rte(uint3); short3 __ovld __cnfn convert_short3_rtz(uint3); short3 __ovld __cnfn convert_short3_sat_rtz(uint3); short3 __ovld __cnfn convert_short3_rtp(uint3); short3 __ovld __cnfn convert_short3_sat_rtp(uint3); short3 __ovld __cnfn convert_short3_rtn(uint3); short3 __ovld __cnfn convert_short3_sat_rtn(uint3); short3 __ovld __cnfn convert_short3(uint3); short3 __ovld __cnfn convert_short3_sat(uint3); short3 __ovld __cnfn convert_short3_rte(long3); short3 __ovld __cnfn convert_short3_sat_rte(long3); short3 __ovld __cnfn convert_short3_rtz(long3); short3 __ovld __cnfn convert_short3_sat_rtz(long3); short3 __ovld __cnfn convert_short3_rtp(long3); short3 __ovld __cnfn convert_short3_sat_rtp(long3); short3 __ovld __cnfn convert_short3_rtn(long3); short3 __ovld __cnfn convert_short3_sat_rtn(long3); short3 __ovld __cnfn convert_short3(long3); short3 __ovld __cnfn convert_short3_sat(long3); short3 __ovld __cnfn convert_short3_rte(ulong3); short3 __ovld __cnfn convert_short3_sat_rte(ulong3); short3 __ovld __cnfn convert_short3_rtz(ulong3); short3 __ovld __cnfn convert_short3_sat_rtz(ulong3); short3 __ovld __cnfn convert_short3_rtp(ulong3); short3 __ovld __cnfn convert_short3_sat_rtp(ulong3); short3 __ovld __cnfn convert_short3_rtn(ulong3); short3 __ovld __cnfn convert_short3_sat_rtn(ulong3); short3 __ovld __cnfn convert_short3(ulong3); short3 __ovld __cnfn convert_short3_sat(ulong3); short3 __ovld __cnfn convert_short3_rte(float3); short3 __ovld __cnfn convert_short3_sat_rte(float3); short3 __ovld __cnfn convert_short3_rtz(float3); short3 __ovld __cnfn convert_short3_sat_rtz(float3); short3 __ovld __cnfn convert_short3_rtp(float3); short3 __ovld __cnfn convert_short3_sat_rtp(float3); short3 __ovld __cnfn convert_short3_rtn(float3); short3 __ovld __cnfn convert_short3_sat_rtn(float3); short3 __ovld __cnfn convert_short3(float3); short3 __ovld __cnfn convert_short3_sat(float3); ushort3 __ovld __cnfn convert_ushort3_rte(char3); ushort3 __ovld __cnfn convert_ushort3_sat_rte(char3); ushort3 __ovld __cnfn convert_ushort3_rtz(char3); ushort3 __ovld __cnfn convert_ushort3_sat_rtz(char3); ushort3 __ovld __cnfn convert_ushort3_rtp(char3); ushort3 __ovld __cnfn convert_ushort3_sat_rtp(char3); ushort3 __ovld __cnfn convert_ushort3_rtn(char3); ushort3 __ovld __cnfn convert_ushort3_sat_rtn(char3); ushort3 __ovld __cnfn convert_ushort3(char3); ushort3 __ovld __cnfn convert_ushort3_sat(char3); ushort3 __ovld __cnfn convert_ushort3_rte(uchar3); ushort3 __ovld __cnfn convert_ushort3_sat_rte(uchar3); ushort3 __ovld __cnfn convert_ushort3_rtz(uchar3); ushort3 __ovld __cnfn convert_ushort3_sat_rtz(uchar3); ushort3 __ovld __cnfn convert_ushort3_rtp(uchar3); ushort3 __ovld __cnfn convert_ushort3_sat_rtp(uchar3); ushort3 __ovld __cnfn convert_ushort3_rtn(uchar3); ushort3 __ovld __cnfn convert_ushort3_sat_rtn(uchar3); ushort3 __ovld __cnfn convert_ushort3(uchar3); ushort3 __ovld __cnfn convert_ushort3_sat(uchar3); ushort3 __ovld __cnfn convert_ushort3_rte(short3); ushort3 __ovld __cnfn convert_ushort3_sat_rte(short3); ushort3 __ovld __cnfn convert_ushort3_rtz(short3); ushort3 __ovld __cnfn convert_ushort3_sat_rtz(short3); ushort3 __ovld __cnfn convert_ushort3_rtp(short3); ushort3 __ovld __cnfn convert_ushort3_sat_rtp(short3); ushort3 __ovld __cnfn convert_ushort3_rtn(short3); ushort3 __ovld __cnfn convert_ushort3_sat_rtn(short3); ushort3 __ovld __cnfn convert_ushort3(short3); ushort3 __ovld __cnfn convert_ushort3_sat(short3); ushort3 __ovld __cnfn convert_ushort3_rte(ushort3); ushort3 __ovld __cnfn convert_ushort3_sat_rte(ushort3); ushort3 __ovld __cnfn convert_ushort3_rtz(ushort3); ushort3 __ovld __cnfn convert_ushort3_sat_rtz(ushort3); ushort3 __ovld __cnfn convert_ushort3_rtp(ushort3); ushort3 __ovld __cnfn convert_ushort3_sat_rtp(ushort3); ushort3 __ovld __cnfn convert_ushort3_rtn(ushort3); ushort3 __ovld __cnfn convert_ushort3_sat_rtn(ushort3); ushort3 __ovld __cnfn convert_ushort3(ushort3); ushort3 __ovld __cnfn convert_ushort3_sat(ushort3); ushort3 __ovld __cnfn convert_ushort3_rte(int3); ushort3 __ovld __cnfn convert_ushort3_sat_rte(int3); ushort3 __ovld __cnfn convert_ushort3_rtz(int3); ushort3 __ovld __cnfn convert_ushort3_sat_rtz(int3); ushort3 __ovld __cnfn convert_ushort3_rtp(int3); ushort3 __ovld __cnfn convert_ushort3_sat_rtp(int3); ushort3 __ovld __cnfn convert_ushort3_rtn(int3); ushort3 __ovld __cnfn convert_ushort3_sat_rtn(int3); ushort3 __ovld __cnfn convert_ushort3(int3); ushort3 __ovld __cnfn convert_ushort3_sat(int3); ushort3 __ovld __cnfn convert_ushort3_rte(uint3); ushort3 __ovld __cnfn convert_ushort3_sat_rte(uint3); ushort3 __ovld __cnfn convert_ushort3_rtz(uint3); ushort3 __ovld __cnfn convert_ushort3_sat_rtz(uint3); ushort3 __ovld __cnfn convert_ushort3_rtp(uint3); ushort3 __ovld __cnfn convert_ushort3_sat_rtp(uint3); ushort3 __ovld __cnfn convert_ushort3_rtn(uint3); ushort3 __ovld __cnfn convert_ushort3_sat_rtn(uint3); ushort3 __ovld __cnfn convert_ushort3(uint3); ushort3 __ovld __cnfn convert_ushort3_sat(uint3); ushort3 __ovld __cnfn convert_ushort3_rte(long3); ushort3 __ovld __cnfn convert_ushort3_sat_rte(long3); ushort3 __ovld __cnfn convert_ushort3_rtz(long3); ushort3 __ovld __cnfn convert_ushort3_sat_rtz(long3); ushort3 __ovld __cnfn convert_ushort3_rtp(long3); ushort3 __ovld __cnfn convert_ushort3_sat_rtp(long3); ushort3 __ovld __cnfn convert_ushort3_rtn(long3); ushort3 __ovld __cnfn convert_ushort3_sat_rtn(long3); ushort3 __ovld __cnfn convert_ushort3(long3); ushort3 __ovld __cnfn convert_ushort3_sat(long3); ushort3 __ovld __cnfn convert_ushort3_rte(ulong3); ushort3 __ovld __cnfn convert_ushort3_sat_rte(ulong3); ushort3 __ovld __cnfn convert_ushort3_rtz(ulong3); ushort3 __ovld __cnfn convert_ushort3_sat_rtz(ulong3); ushort3 __ovld __cnfn convert_ushort3_rtp(ulong3); ushort3 __ovld __cnfn convert_ushort3_sat_rtp(ulong3); ushort3 __ovld __cnfn convert_ushort3_rtn(ulong3); ushort3 __ovld __cnfn convert_ushort3_sat_rtn(ulong3); ushort3 __ovld __cnfn convert_ushort3(ulong3); ushort3 __ovld __cnfn convert_ushort3_sat(ulong3); ushort3 __ovld __cnfn convert_ushort3_rte(float3); ushort3 __ovld __cnfn convert_ushort3_sat_rte(float3); ushort3 __ovld __cnfn convert_ushort3_rtz(float3); ushort3 __ovld __cnfn convert_ushort3_sat_rtz(float3); ushort3 __ovld __cnfn convert_ushort3_rtp(float3); ushort3 __ovld __cnfn convert_ushort3_sat_rtp(float3); ushort3 __ovld __cnfn convert_ushort3_rtn(float3); ushort3 __ovld __cnfn convert_ushort3_sat_rtn(float3); ushort3 __ovld __cnfn convert_ushort3(float3); ushort3 __ovld __cnfn convert_ushort3_sat(float3); int3 __ovld __cnfn convert_int3_rte(char3); int3 __ovld __cnfn convert_int3_sat_rte(char3); int3 __ovld __cnfn convert_int3_rtz(char3); int3 __ovld __cnfn convert_int3_sat_rtz(char3); int3 __ovld __cnfn convert_int3_rtp(char3); int3 __ovld __cnfn convert_int3_sat_rtp(char3); int3 __ovld __cnfn convert_int3_rtn(char3); int3 __ovld __cnfn convert_int3_sat_rtn(char3); int3 __ovld __cnfn convert_int3(char3); int3 __ovld __cnfn convert_int3_sat(char3); int3 __ovld __cnfn convert_int3_rte(uchar3); int3 __ovld __cnfn convert_int3_sat_rte(uchar3); int3 __ovld __cnfn convert_int3_rtz(uchar3); int3 __ovld __cnfn convert_int3_sat_rtz(uchar3); int3 __ovld __cnfn convert_int3_rtp(uchar3); int3 __ovld __cnfn convert_int3_sat_rtp(uchar3); int3 __ovld __cnfn convert_int3_rtn(uchar3); int3 __ovld __cnfn convert_int3_sat_rtn(uchar3); int3 __ovld __cnfn convert_int3(uchar3); int3 __ovld __cnfn convert_int3_sat(uchar3); int3 __ovld __cnfn convert_int3_rte(short3); int3 __ovld __cnfn convert_int3_sat_rte(short3); int3 __ovld __cnfn convert_int3_rtz(short3); int3 __ovld __cnfn convert_int3_sat_rtz(short3); int3 __ovld __cnfn convert_int3_rtp(short3); int3 __ovld __cnfn convert_int3_sat_rtp(short3); int3 __ovld __cnfn convert_int3_rtn(short3); int3 __ovld __cnfn convert_int3_sat_rtn(short3); int3 __ovld __cnfn convert_int3(short3); int3 __ovld __cnfn convert_int3_sat(short3); int3 __ovld __cnfn convert_int3_rte(ushort3); int3 __ovld __cnfn convert_int3_sat_rte(ushort3); int3 __ovld __cnfn convert_int3_rtz(ushort3); int3 __ovld __cnfn convert_int3_sat_rtz(ushort3); int3 __ovld __cnfn convert_int3_rtp(ushort3); int3 __ovld __cnfn convert_int3_sat_rtp(ushort3); int3 __ovld __cnfn convert_int3_rtn(ushort3); int3 __ovld __cnfn convert_int3_sat_rtn(ushort3); int3 __ovld __cnfn convert_int3(ushort3); int3 __ovld __cnfn convert_int3_sat(ushort3); int3 __ovld __cnfn convert_int3_rte(int3); int3 __ovld __cnfn convert_int3_sat_rte(int3); int3 __ovld __cnfn convert_int3_rtz(int3); int3 __ovld __cnfn convert_int3_sat_rtz(int3); int3 __ovld __cnfn convert_int3_rtp(int3); int3 __ovld __cnfn convert_int3_sat_rtp(int3); int3 __ovld __cnfn convert_int3_rtn(int3); int3 __ovld __cnfn convert_int3_sat_rtn(int3); int3 __ovld __cnfn convert_int3(int3); int3 __ovld __cnfn convert_int3_sat(int3); int3 __ovld __cnfn convert_int3_rte(uint3); int3 __ovld __cnfn convert_int3_sat_rte(uint3); int3 __ovld __cnfn convert_int3_rtz(uint3); int3 __ovld __cnfn convert_int3_sat_rtz(uint3); int3 __ovld __cnfn convert_int3_rtp(uint3); int3 __ovld __cnfn convert_int3_sat_rtp(uint3); int3 __ovld __cnfn convert_int3_rtn(uint3); int3 __ovld __cnfn convert_int3_sat_rtn(uint3); int3 __ovld __cnfn convert_int3(uint3); int3 __ovld __cnfn convert_int3_sat(uint3); int3 __ovld __cnfn convert_int3_rte(long3); int3 __ovld __cnfn convert_int3_sat_rte(long3); int3 __ovld __cnfn convert_int3_rtz(long3); int3 __ovld __cnfn convert_int3_sat_rtz(long3); int3 __ovld __cnfn convert_int3_rtp(long3); int3 __ovld __cnfn convert_int3_sat_rtp(long3); int3 __ovld __cnfn convert_int3_rtn(long3); int3 __ovld __cnfn convert_int3_sat_rtn(long3); int3 __ovld __cnfn convert_int3(long3); int3 __ovld __cnfn convert_int3_sat(long3); int3 __ovld __cnfn convert_int3_rte(ulong3); int3 __ovld __cnfn convert_int3_sat_rte(ulong3); int3 __ovld __cnfn convert_int3_rtz(ulong3); int3 __ovld __cnfn convert_int3_sat_rtz(ulong3); int3 __ovld __cnfn convert_int3_rtp(ulong3); int3 __ovld __cnfn convert_int3_sat_rtp(ulong3); int3 __ovld __cnfn convert_int3_rtn(ulong3); int3 __ovld __cnfn convert_int3_sat_rtn(ulong3); int3 __ovld __cnfn convert_int3(ulong3); int3 __ovld __cnfn convert_int3_sat(ulong3); int3 __ovld __cnfn convert_int3_rte(float3); int3 __ovld __cnfn convert_int3_sat_rte(float3); int3 __ovld __cnfn convert_int3_rtz(float3); int3 __ovld __cnfn convert_int3_sat_rtz(float3); int3 __ovld __cnfn convert_int3_rtp(float3); int3 __ovld __cnfn convert_int3_sat_rtp(float3); int3 __ovld __cnfn convert_int3_rtn(float3); int3 __ovld __cnfn convert_int3_sat_rtn(float3); int3 __ovld __cnfn convert_int3(float3); int3 __ovld __cnfn convert_int3_sat(float3); uint3 __ovld __cnfn convert_uint3_rte(char3); uint3 __ovld __cnfn convert_uint3_sat_rte(char3); uint3 __ovld __cnfn convert_uint3_rtz(char3); uint3 __ovld __cnfn convert_uint3_sat_rtz(char3); uint3 __ovld __cnfn convert_uint3_rtp(char3); uint3 __ovld __cnfn convert_uint3_sat_rtp(char3); uint3 __ovld __cnfn convert_uint3_rtn(char3); uint3 __ovld __cnfn convert_uint3_sat_rtn(char3); uint3 __ovld __cnfn convert_uint3(char3); uint3 __ovld __cnfn convert_uint3_sat(char3); uint3 __ovld __cnfn convert_uint3_rte(uchar3); uint3 __ovld __cnfn convert_uint3_sat_rte(uchar3); uint3 __ovld __cnfn convert_uint3_rtz(uchar3); uint3 __ovld __cnfn convert_uint3_sat_rtz(uchar3); uint3 __ovld __cnfn convert_uint3_rtp(uchar3); uint3 __ovld __cnfn convert_uint3_sat_rtp(uchar3); uint3 __ovld __cnfn convert_uint3_rtn(uchar3); uint3 __ovld __cnfn convert_uint3_sat_rtn(uchar3); uint3 __ovld __cnfn convert_uint3(uchar3); uint3 __ovld __cnfn convert_uint3_sat(uchar3); uint3 __ovld __cnfn convert_uint3_rte(short3); uint3 __ovld __cnfn convert_uint3_sat_rte(short3); uint3 __ovld __cnfn convert_uint3_rtz(short3); uint3 __ovld __cnfn convert_uint3_sat_rtz(short3); uint3 __ovld __cnfn convert_uint3_rtp(short3); uint3 __ovld __cnfn convert_uint3_sat_rtp(short3); uint3 __ovld __cnfn convert_uint3_rtn(short3); uint3 __ovld __cnfn convert_uint3_sat_rtn(short3); uint3 __ovld __cnfn convert_uint3(short3); uint3 __ovld __cnfn convert_uint3_sat(short3); uint3 __ovld __cnfn convert_uint3_rte(ushort3); uint3 __ovld __cnfn convert_uint3_sat_rte(ushort3); uint3 __ovld __cnfn convert_uint3_rtz(ushort3); uint3 __ovld __cnfn convert_uint3_sat_rtz(ushort3); uint3 __ovld __cnfn convert_uint3_rtp(ushort3); uint3 __ovld __cnfn convert_uint3_sat_rtp(ushort3); uint3 __ovld __cnfn convert_uint3_rtn(ushort3); uint3 __ovld __cnfn convert_uint3_sat_rtn(ushort3); uint3 __ovld __cnfn convert_uint3(ushort3); uint3 __ovld __cnfn convert_uint3_sat(ushort3); uint3 __ovld __cnfn convert_uint3_rte(int3); uint3 __ovld __cnfn convert_uint3_sat_rte(int3); uint3 __ovld __cnfn convert_uint3_rtz(int3); uint3 __ovld __cnfn convert_uint3_sat_rtz(int3); uint3 __ovld __cnfn convert_uint3_rtp(int3); uint3 __ovld __cnfn convert_uint3_sat_rtp(int3); uint3 __ovld __cnfn convert_uint3_rtn(int3); uint3 __ovld __cnfn convert_uint3_sat_rtn(int3); uint3 __ovld __cnfn convert_uint3(int3); uint3 __ovld __cnfn convert_uint3_sat(int3); uint3 __ovld __cnfn convert_uint3_rte(uint3); uint3 __ovld __cnfn convert_uint3_sat_rte(uint3); uint3 __ovld __cnfn convert_uint3_rtz(uint3); uint3 __ovld __cnfn convert_uint3_sat_rtz(uint3); uint3 __ovld __cnfn convert_uint3_rtp(uint3); uint3 __ovld __cnfn convert_uint3_sat_rtp(uint3); uint3 __ovld __cnfn convert_uint3_rtn(uint3); uint3 __ovld __cnfn convert_uint3_sat_rtn(uint3); uint3 __ovld __cnfn convert_uint3(uint3); uint3 __ovld __cnfn convert_uint3_sat(uint3); uint3 __ovld __cnfn convert_uint3_rte(long3); uint3 __ovld __cnfn convert_uint3_sat_rte(long3); uint3 __ovld __cnfn convert_uint3_rtz(long3); uint3 __ovld __cnfn convert_uint3_sat_rtz(long3); uint3 __ovld __cnfn convert_uint3_rtp(long3); uint3 __ovld __cnfn convert_uint3_sat_rtp(long3); uint3 __ovld __cnfn convert_uint3_rtn(long3); uint3 __ovld __cnfn convert_uint3_sat_rtn(long3); uint3 __ovld __cnfn convert_uint3(long3); uint3 __ovld __cnfn convert_uint3_sat(long3); uint3 __ovld __cnfn convert_uint3_rte(ulong3); uint3 __ovld __cnfn convert_uint3_sat_rte(ulong3); uint3 __ovld __cnfn convert_uint3_rtz(ulong3); uint3 __ovld __cnfn convert_uint3_sat_rtz(ulong3); uint3 __ovld __cnfn convert_uint3_rtp(ulong3); uint3 __ovld __cnfn convert_uint3_sat_rtp(ulong3); uint3 __ovld __cnfn convert_uint3_rtn(ulong3); uint3 __ovld __cnfn convert_uint3_sat_rtn(ulong3); uint3 __ovld __cnfn convert_uint3(ulong3); uint3 __ovld __cnfn convert_uint3_sat(ulong3); uint3 __ovld __cnfn convert_uint3_rte(float3); uint3 __ovld __cnfn convert_uint3_sat_rte(float3); uint3 __ovld __cnfn convert_uint3_rtz(float3); uint3 __ovld __cnfn convert_uint3_sat_rtz(float3); uint3 __ovld __cnfn convert_uint3_rtp(float3); uint3 __ovld __cnfn convert_uint3_sat_rtp(float3); uint3 __ovld __cnfn convert_uint3_rtn(float3); uint3 __ovld __cnfn convert_uint3_sat_rtn(float3); uint3 __ovld __cnfn convert_uint3(float3); uint3 __ovld __cnfn convert_uint3_sat(float3); long3 __ovld __cnfn convert_long3_rte(char3); long3 __ovld __cnfn convert_long3_sat_rte(char3); long3 __ovld __cnfn convert_long3_rtz(char3); long3 __ovld __cnfn convert_long3_sat_rtz(char3); long3 __ovld __cnfn convert_long3_rtp(char3); long3 __ovld __cnfn convert_long3_sat_rtp(char3); long3 __ovld __cnfn convert_long3_rtn(char3); long3 __ovld __cnfn convert_long3_sat_rtn(char3); long3 __ovld __cnfn convert_long3(char3); long3 __ovld __cnfn convert_long3_sat(char3); long3 __ovld __cnfn convert_long3_rte(uchar3); long3 __ovld __cnfn convert_long3_sat_rte(uchar3); long3 __ovld __cnfn convert_long3_rtz(uchar3); long3 __ovld __cnfn convert_long3_sat_rtz(uchar3); long3 __ovld __cnfn convert_long3_rtp(uchar3); long3 __ovld __cnfn convert_long3_sat_rtp(uchar3); long3 __ovld __cnfn convert_long3_rtn(uchar3); long3 __ovld __cnfn convert_long3_sat_rtn(uchar3); long3 __ovld __cnfn convert_long3(uchar3); long3 __ovld __cnfn convert_long3_sat(uchar3); long3 __ovld __cnfn convert_long3_rte(short3); long3 __ovld __cnfn convert_long3_sat_rte(short3); long3 __ovld __cnfn convert_long3_rtz(short3); long3 __ovld __cnfn convert_long3_sat_rtz(short3); long3 __ovld __cnfn convert_long3_rtp(short3); long3 __ovld __cnfn convert_long3_sat_rtp(short3); long3 __ovld __cnfn convert_long3_rtn(short3); long3 __ovld __cnfn convert_long3_sat_rtn(short3); long3 __ovld __cnfn convert_long3(short3); long3 __ovld __cnfn convert_long3_sat(short3); long3 __ovld __cnfn convert_long3_rte(ushort3); long3 __ovld __cnfn convert_long3_sat_rte(ushort3); long3 __ovld __cnfn convert_long3_rtz(ushort3); long3 __ovld __cnfn convert_long3_sat_rtz(ushort3); long3 __ovld __cnfn convert_long3_rtp(ushort3); long3 __ovld __cnfn convert_long3_sat_rtp(ushort3); long3 __ovld __cnfn convert_long3_rtn(ushort3); long3 __ovld __cnfn convert_long3_sat_rtn(ushort3); long3 __ovld __cnfn convert_long3(ushort3); long3 __ovld __cnfn convert_long3_sat(ushort3); long3 __ovld __cnfn convert_long3_rte(int3); long3 __ovld __cnfn convert_long3_sat_rte(int3); long3 __ovld __cnfn convert_long3_rtz(int3); long3 __ovld __cnfn convert_long3_sat_rtz(int3); long3 __ovld __cnfn convert_long3_rtp(int3); long3 __ovld __cnfn convert_long3_sat_rtp(int3); long3 __ovld __cnfn convert_long3_rtn(int3); long3 __ovld __cnfn convert_long3_sat_rtn(int3); long3 __ovld __cnfn convert_long3(int3); long3 __ovld __cnfn convert_long3_sat(int3); long3 __ovld __cnfn convert_long3_rte(uint3); long3 __ovld __cnfn convert_long3_sat_rte(uint3); long3 __ovld __cnfn convert_long3_rtz(uint3); long3 __ovld __cnfn convert_long3_sat_rtz(uint3); long3 __ovld __cnfn convert_long3_rtp(uint3); long3 __ovld __cnfn convert_long3_sat_rtp(uint3); long3 __ovld __cnfn convert_long3_rtn(uint3); long3 __ovld __cnfn convert_long3_sat_rtn(uint3); long3 __ovld __cnfn convert_long3(uint3); long3 __ovld __cnfn convert_long3_sat(uint3); long3 __ovld __cnfn convert_long3_rte(long3); long3 __ovld __cnfn convert_long3_sat_rte(long3); long3 __ovld __cnfn convert_long3_rtz(long3); long3 __ovld __cnfn convert_long3_sat_rtz(long3); long3 __ovld __cnfn convert_long3_rtp(long3); long3 __ovld __cnfn convert_long3_sat_rtp(long3); long3 __ovld __cnfn convert_long3_rtn(long3); long3 __ovld __cnfn convert_long3_sat_rtn(long3); long3 __ovld __cnfn convert_long3(long3); long3 __ovld __cnfn convert_long3_sat(long3); long3 __ovld __cnfn convert_long3_rte(ulong3); long3 __ovld __cnfn convert_long3_sat_rte(ulong3); long3 __ovld __cnfn convert_long3_rtz(ulong3); long3 __ovld __cnfn convert_long3_sat_rtz(ulong3); long3 __ovld __cnfn convert_long3_rtp(ulong3); long3 __ovld __cnfn convert_long3_sat_rtp(ulong3); long3 __ovld __cnfn convert_long3_rtn(ulong3); long3 __ovld __cnfn convert_long3_sat_rtn(ulong3); long3 __ovld __cnfn convert_long3(ulong3); long3 __ovld __cnfn convert_long3_sat(ulong3); long3 __ovld __cnfn convert_long3_rte(float3); long3 __ovld __cnfn convert_long3_sat_rte(float3); long3 __ovld __cnfn convert_long3_rtz(float3); long3 __ovld __cnfn convert_long3_sat_rtz(float3); long3 __ovld __cnfn convert_long3_rtp(float3); long3 __ovld __cnfn convert_long3_sat_rtp(float3); long3 __ovld __cnfn convert_long3_rtn(float3); long3 __ovld __cnfn convert_long3_sat_rtn(float3); long3 __ovld __cnfn convert_long3(float3); long3 __ovld __cnfn convert_long3_sat(float3); ulong3 __ovld __cnfn convert_ulong3_rte(char3); ulong3 __ovld __cnfn convert_ulong3_sat_rte(char3); ulong3 __ovld __cnfn convert_ulong3_rtz(char3); ulong3 __ovld __cnfn convert_ulong3_sat_rtz(char3); ulong3 __ovld __cnfn convert_ulong3_rtp(char3); ulong3 __ovld __cnfn convert_ulong3_sat_rtp(char3); ulong3 __ovld __cnfn convert_ulong3_rtn(char3); ulong3 __ovld __cnfn convert_ulong3_sat_rtn(char3); ulong3 __ovld __cnfn convert_ulong3(char3); ulong3 __ovld __cnfn convert_ulong3_sat(char3); ulong3 __ovld __cnfn convert_ulong3_rte(uchar3); ulong3 __ovld __cnfn convert_ulong3_sat_rte(uchar3); ulong3 __ovld __cnfn convert_ulong3_rtz(uchar3); ulong3 __ovld __cnfn convert_ulong3_sat_rtz(uchar3); ulong3 __ovld __cnfn convert_ulong3_rtp(uchar3); ulong3 __ovld __cnfn convert_ulong3_sat_rtp(uchar3); ulong3 __ovld __cnfn convert_ulong3_rtn(uchar3); ulong3 __ovld __cnfn convert_ulong3_sat_rtn(uchar3); ulong3 __ovld __cnfn convert_ulong3(uchar3); ulong3 __ovld __cnfn convert_ulong3_sat(uchar3); ulong3 __ovld __cnfn convert_ulong3_rte(short3); ulong3 __ovld __cnfn convert_ulong3_sat_rte(short3); ulong3 __ovld __cnfn convert_ulong3_rtz(short3); ulong3 __ovld __cnfn convert_ulong3_sat_rtz(short3); ulong3 __ovld __cnfn convert_ulong3_rtp(short3); ulong3 __ovld __cnfn convert_ulong3_sat_rtp(short3); ulong3 __ovld __cnfn convert_ulong3_rtn(short3); ulong3 __ovld __cnfn convert_ulong3_sat_rtn(short3); ulong3 __ovld __cnfn convert_ulong3(short3); ulong3 __ovld __cnfn convert_ulong3_sat(short3); ulong3 __ovld __cnfn convert_ulong3_rte(ushort3); ulong3 __ovld __cnfn convert_ulong3_sat_rte(ushort3); ulong3 __ovld __cnfn convert_ulong3_rtz(ushort3); ulong3 __ovld __cnfn convert_ulong3_sat_rtz(ushort3); ulong3 __ovld __cnfn convert_ulong3_rtp(ushort3); ulong3 __ovld __cnfn convert_ulong3_sat_rtp(ushort3); ulong3 __ovld __cnfn convert_ulong3_rtn(ushort3); ulong3 __ovld __cnfn convert_ulong3_sat_rtn(ushort3); ulong3 __ovld __cnfn convert_ulong3(ushort3); ulong3 __ovld __cnfn convert_ulong3_sat(ushort3); ulong3 __ovld __cnfn convert_ulong3_rte(int3); ulong3 __ovld __cnfn convert_ulong3_sat_rte(int3); ulong3 __ovld __cnfn convert_ulong3_rtz(int3); ulong3 __ovld __cnfn convert_ulong3_sat_rtz(int3); ulong3 __ovld __cnfn convert_ulong3_rtp(int3); ulong3 __ovld __cnfn convert_ulong3_sat_rtp(int3); ulong3 __ovld __cnfn convert_ulong3_rtn(int3); ulong3 __ovld __cnfn convert_ulong3_sat_rtn(int3); ulong3 __ovld __cnfn convert_ulong3(int3); ulong3 __ovld __cnfn convert_ulong3_sat(int3); ulong3 __ovld __cnfn convert_ulong3_rte(uint3); ulong3 __ovld __cnfn convert_ulong3_sat_rte(uint3); ulong3 __ovld __cnfn convert_ulong3_rtz(uint3); ulong3 __ovld __cnfn convert_ulong3_sat_rtz(uint3); ulong3 __ovld __cnfn convert_ulong3_rtp(uint3); ulong3 __ovld __cnfn convert_ulong3_sat_rtp(uint3); ulong3 __ovld __cnfn convert_ulong3_rtn(uint3); ulong3 __ovld __cnfn convert_ulong3_sat_rtn(uint3); ulong3 __ovld __cnfn convert_ulong3(uint3); ulong3 __ovld __cnfn convert_ulong3_sat(uint3); ulong3 __ovld __cnfn convert_ulong3_rte(long3); ulong3 __ovld __cnfn convert_ulong3_sat_rte(long3); ulong3 __ovld __cnfn convert_ulong3_rtz(long3); ulong3 __ovld __cnfn convert_ulong3_sat_rtz(long3); ulong3 __ovld __cnfn convert_ulong3_rtp(long3); ulong3 __ovld __cnfn convert_ulong3_sat_rtp(long3); ulong3 __ovld __cnfn convert_ulong3_rtn(long3); ulong3 __ovld __cnfn convert_ulong3_sat_rtn(long3); ulong3 __ovld __cnfn convert_ulong3(long3); ulong3 __ovld __cnfn convert_ulong3_sat(long3); ulong3 __ovld __cnfn convert_ulong3_rte(ulong3); ulong3 __ovld __cnfn convert_ulong3_sat_rte(ulong3); ulong3 __ovld __cnfn convert_ulong3_rtz(ulong3); ulong3 __ovld __cnfn convert_ulong3_sat_rtz(ulong3); ulong3 __ovld __cnfn convert_ulong3_rtp(ulong3); ulong3 __ovld __cnfn convert_ulong3_sat_rtp(ulong3); ulong3 __ovld __cnfn convert_ulong3_rtn(ulong3); ulong3 __ovld __cnfn convert_ulong3_sat_rtn(ulong3); ulong3 __ovld __cnfn convert_ulong3(ulong3); ulong3 __ovld __cnfn convert_ulong3_sat(ulong3); ulong3 __ovld __cnfn convert_ulong3_rte(float3); ulong3 __ovld __cnfn convert_ulong3_sat_rte(float3); ulong3 __ovld __cnfn convert_ulong3_rtz(float3); ulong3 __ovld __cnfn convert_ulong3_sat_rtz(float3); ulong3 __ovld __cnfn convert_ulong3_rtp(float3); ulong3 __ovld __cnfn convert_ulong3_sat_rtp(float3); ulong3 __ovld __cnfn convert_ulong3_rtn(float3); ulong3 __ovld __cnfn convert_ulong3_sat_rtn(float3); ulong3 __ovld __cnfn convert_ulong3(float3); ulong3 __ovld __cnfn convert_ulong3_sat(float3); float3 __ovld __cnfn convert_float3_rte(char3); float3 __ovld __cnfn convert_float3_rtz(char3); float3 __ovld __cnfn convert_float3_rtp(char3); float3 __ovld __cnfn convert_float3_rtn(char3); float3 __ovld __cnfn convert_float3(char3); float3 __ovld __cnfn convert_float3_rte(uchar3); float3 __ovld __cnfn convert_float3_rtz(uchar3); float3 __ovld __cnfn convert_float3_rtp(uchar3); float3 __ovld __cnfn convert_float3_rtn(uchar3); float3 __ovld __cnfn convert_float3(uchar3); float3 __ovld __cnfn convert_float3_rte(short3); float3 __ovld __cnfn convert_float3_rtz(short3); float3 __ovld __cnfn convert_float3_rtp(short3); float3 __ovld __cnfn convert_float3_rtn(short3); float3 __ovld __cnfn convert_float3(short3); float3 __ovld __cnfn convert_float3_rte(ushort3); float3 __ovld __cnfn convert_float3_rtz(ushort3); float3 __ovld __cnfn convert_float3_rtp(ushort3); float3 __ovld __cnfn convert_float3_rtn(ushort3); float3 __ovld __cnfn convert_float3(ushort3); float3 __ovld __cnfn convert_float3_rte(int3); float3 __ovld __cnfn convert_float3_rtz(int3); float3 __ovld __cnfn convert_float3_rtp(int3); float3 __ovld __cnfn convert_float3_rtn(int3); float3 __ovld __cnfn convert_float3(int3); float3 __ovld __cnfn convert_float3_rte(uint3); float3 __ovld __cnfn convert_float3_rtz(uint3); float3 __ovld __cnfn convert_float3_rtp(uint3); float3 __ovld __cnfn convert_float3_rtn(uint3); float3 __ovld __cnfn convert_float3(uint3); float3 __ovld __cnfn convert_float3_rte(long3); float3 __ovld __cnfn convert_float3_rtz(long3); float3 __ovld __cnfn convert_float3_rtp(long3); float3 __ovld __cnfn convert_float3_rtn(long3); float3 __ovld __cnfn convert_float3(long3); float3 __ovld __cnfn convert_float3_rte(ulong3); float3 __ovld __cnfn convert_float3_rtz(ulong3); float3 __ovld __cnfn convert_float3_rtp(ulong3); float3 __ovld __cnfn convert_float3_rtn(ulong3); float3 __ovld __cnfn convert_float3(ulong3); float3 __ovld __cnfn convert_float3_rte(float3); float3 __ovld __cnfn convert_float3_rtz(float3); float3 __ovld __cnfn convert_float3_rtp(float3); float3 __ovld __cnfn convert_float3_rtn(float3); float3 __ovld __cnfn convert_float3(float3); char4 __ovld __cnfn convert_char4_rte(char4); char4 __ovld __cnfn convert_char4_sat_rte(char4); char4 __ovld __cnfn convert_char4_rtz(char4); char4 __ovld __cnfn convert_char4_sat_rtz(char4); char4 __ovld __cnfn convert_char4_rtp(char4); char4 __ovld __cnfn convert_char4_sat_rtp(char4); char4 __ovld __cnfn convert_char4_rtn(char4); char4 __ovld __cnfn convert_char4_sat_rtn(char4); char4 __ovld __cnfn convert_char4(char4); char4 __ovld __cnfn convert_char4_sat(char4); char4 __ovld __cnfn convert_char4_rte(uchar4); char4 __ovld __cnfn convert_char4_sat_rte(uchar4); char4 __ovld __cnfn convert_char4_rtz(uchar4); char4 __ovld __cnfn convert_char4_sat_rtz(uchar4); char4 __ovld __cnfn convert_char4_rtp(uchar4); char4 __ovld __cnfn convert_char4_sat_rtp(uchar4); char4 __ovld __cnfn convert_char4_rtn(uchar4); char4 __ovld __cnfn convert_char4_sat_rtn(uchar4); char4 __ovld __cnfn convert_char4(uchar4); char4 __ovld __cnfn convert_char4_sat(uchar4); char4 __ovld __cnfn convert_char4_rte(short4); char4 __ovld __cnfn convert_char4_sat_rte(short4); char4 __ovld __cnfn convert_char4_rtz(short4); char4 __ovld __cnfn convert_char4_sat_rtz(short4); char4 __ovld __cnfn convert_char4_rtp(short4); char4 __ovld __cnfn convert_char4_sat_rtp(short4); char4 __ovld __cnfn convert_char4_rtn(short4); char4 __ovld __cnfn convert_char4_sat_rtn(short4); char4 __ovld __cnfn convert_char4(short4); char4 __ovld __cnfn convert_char4_sat(short4); char4 __ovld __cnfn convert_char4_rte(ushort4); char4 __ovld __cnfn convert_char4_sat_rte(ushort4); char4 __ovld __cnfn convert_char4_rtz(ushort4); char4 __ovld __cnfn convert_char4_sat_rtz(ushort4); char4 __ovld __cnfn convert_char4_rtp(ushort4); char4 __ovld __cnfn convert_char4_sat_rtp(ushort4); char4 __ovld __cnfn convert_char4_rtn(ushort4); char4 __ovld __cnfn convert_char4_sat_rtn(ushort4); char4 __ovld __cnfn convert_char4(ushort4); char4 __ovld __cnfn convert_char4_sat(ushort4); char4 __ovld __cnfn convert_char4_rte(int4); char4 __ovld __cnfn convert_char4_sat_rte(int4); char4 __ovld __cnfn convert_char4_rtz(int4); char4 __ovld __cnfn convert_char4_sat_rtz(int4); char4 __ovld __cnfn convert_char4_rtp(int4); char4 __ovld __cnfn convert_char4_sat_rtp(int4); char4 __ovld __cnfn convert_char4_rtn(int4); char4 __ovld __cnfn convert_char4_sat_rtn(int4); char4 __ovld __cnfn convert_char4(int4); char4 __ovld __cnfn convert_char4_sat(int4); char4 __ovld __cnfn convert_char4_rte(uint4); char4 __ovld __cnfn convert_char4_sat_rte(uint4); char4 __ovld __cnfn convert_char4_rtz(uint4); char4 __ovld __cnfn convert_char4_sat_rtz(uint4); char4 __ovld __cnfn convert_char4_rtp(uint4); char4 __ovld __cnfn convert_char4_sat_rtp(uint4); char4 __ovld __cnfn convert_char4_rtn(uint4); char4 __ovld __cnfn convert_char4_sat_rtn(uint4); char4 __ovld __cnfn convert_char4(uint4); char4 __ovld __cnfn convert_char4_sat(uint4); char4 __ovld __cnfn convert_char4_rte(long4); char4 __ovld __cnfn convert_char4_sat_rte(long4); char4 __ovld __cnfn convert_char4_rtz(long4); char4 __ovld __cnfn convert_char4_sat_rtz(long4); char4 __ovld __cnfn convert_char4_rtp(long4); char4 __ovld __cnfn convert_char4_sat_rtp(long4); char4 __ovld __cnfn convert_char4_rtn(long4); char4 __ovld __cnfn convert_char4_sat_rtn(long4); char4 __ovld __cnfn convert_char4(long4); char4 __ovld __cnfn convert_char4_sat(long4); char4 __ovld __cnfn convert_char4_rte(ulong4); char4 __ovld __cnfn convert_char4_sat_rte(ulong4); char4 __ovld __cnfn convert_char4_rtz(ulong4); char4 __ovld __cnfn convert_char4_sat_rtz(ulong4); char4 __ovld __cnfn convert_char4_rtp(ulong4); char4 __ovld __cnfn convert_char4_sat_rtp(ulong4); char4 __ovld __cnfn convert_char4_rtn(ulong4); char4 __ovld __cnfn convert_char4_sat_rtn(ulong4); char4 __ovld __cnfn convert_char4(ulong4); char4 __ovld __cnfn convert_char4_sat(ulong4); char4 __ovld __cnfn convert_char4_rte(float4); char4 __ovld __cnfn convert_char4_sat_rte(float4); char4 __ovld __cnfn convert_char4_rtz(float4); char4 __ovld __cnfn convert_char4_sat_rtz(float4); char4 __ovld __cnfn convert_char4_rtp(float4); char4 __ovld __cnfn convert_char4_sat_rtp(float4); char4 __ovld __cnfn convert_char4_rtn(float4); char4 __ovld __cnfn convert_char4_sat_rtn(float4); char4 __ovld __cnfn convert_char4(float4); char4 __ovld __cnfn convert_char4_sat(float4); uchar4 __ovld __cnfn convert_uchar4_rte(char4); uchar4 __ovld __cnfn convert_uchar4_sat_rte(char4); uchar4 __ovld __cnfn convert_uchar4_rtz(char4); uchar4 __ovld __cnfn convert_uchar4_sat_rtz(char4); uchar4 __ovld __cnfn convert_uchar4_rtp(char4); uchar4 __ovld __cnfn convert_uchar4_sat_rtp(char4); uchar4 __ovld __cnfn convert_uchar4_rtn(char4); uchar4 __ovld __cnfn convert_uchar4_sat_rtn(char4); uchar4 __ovld __cnfn convert_uchar4(char4); uchar4 __ovld __cnfn convert_uchar4_sat(char4); uchar4 __ovld __cnfn convert_uchar4_rte(uchar4); uchar4 __ovld __cnfn convert_uchar4_sat_rte(uchar4); uchar4 __ovld __cnfn convert_uchar4_rtz(uchar4); uchar4 __ovld __cnfn convert_uchar4_sat_rtz(uchar4); uchar4 __ovld __cnfn convert_uchar4_rtp(uchar4); uchar4 __ovld __cnfn convert_uchar4_sat_rtp(uchar4); uchar4 __ovld __cnfn convert_uchar4_rtn(uchar4); uchar4 __ovld __cnfn convert_uchar4_sat_rtn(uchar4); uchar4 __ovld __cnfn convert_uchar4(uchar4); uchar4 __ovld __cnfn convert_uchar4_sat(uchar4); uchar4 __ovld __cnfn convert_uchar4_rte(short4); uchar4 __ovld __cnfn convert_uchar4_sat_rte(short4); uchar4 __ovld __cnfn convert_uchar4_rtz(short4); uchar4 __ovld __cnfn convert_uchar4_sat_rtz(short4); uchar4 __ovld __cnfn convert_uchar4_rtp(short4); uchar4 __ovld __cnfn convert_uchar4_sat_rtp(short4); uchar4 __ovld __cnfn convert_uchar4_rtn(short4); uchar4 __ovld __cnfn convert_uchar4_sat_rtn(short4); uchar4 __ovld __cnfn convert_uchar4(short4); uchar4 __ovld __cnfn convert_uchar4_sat(short4); uchar4 __ovld __cnfn convert_uchar4_rte(ushort4); uchar4 __ovld __cnfn convert_uchar4_sat_rte(ushort4); uchar4 __ovld __cnfn convert_uchar4_rtz(ushort4); uchar4 __ovld __cnfn convert_uchar4_sat_rtz(ushort4); uchar4 __ovld __cnfn convert_uchar4_rtp(ushort4); uchar4 __ovld __cnfn convert_uchar4_sat_rtp(ushort4); uchar4 __ovld __cnfn convert_uchar4_rtn(ushort4); uchar4 __ovld __cnfn convert_uchar4_sat_rtn(ushort4); uchar4 __ovld __cnfn convert_uchar4(ushort4); uchar4 __ovld __cnfn convert_uchar4_sat(ushort4); uchar4 __ovld __cnfn convert_uchar4_rte(int4); uchar4 __ovld __cnfn convert_uchar4_sat_rte(int4); uchar4 __ovld __cnfn convert_uchar4_rtz(int4); uchar4 __ovld __cnfn convert_uchar4_sat_rtz(int4); uchar4 __ovld __cnfn convert_uchar4_rtp(int4); uchar4 __ovld __cnfn convert_uchar4_sat_rtp(int4); uchar4 __ovld __cnfn convert_uchar4_rtn(int4); uchar4 __ovld __cnfn convert_uchar4_sat_rtn(int4); uchar4 __ovld __cnfn convert_uchar4(int4); uchar4 __ovld __cnfn convert_uchar4_sat(int4); uchar4 __ovld __cnfn convert_uchar4_rte(uint4); uchar4 __ovld __cnfn convert_uchar4_sat_rte(uint4); uchar4 __ovld __cnfn convert_uchar4_rtz(uint4); uchar4 __ovld __cnfn convert_uchar4_sat_rtz(uint4); uchar4 __ovld __cnfn convert_uchar4_rtp(uint4); uchar4 __ovld __cnfn convert_uchar4_sat_rtp(uint4); uchar4 __ovld __cnfn convert_uchar4_rtn(uint4); uchar4 __ovld __cnfn convert_uchar4_sat_rtn(uint4); uchar4 __ovld __cnfn convert_uchar4(uint4); uchar4 __ovld __cnfn convert_uchar4_sat(uint4); uchar4 __ovld __cnfn convert_uchar4_rte(long4); uchar4 __ovld __cnfn convert_uchar4_sat_rte(long4); uchar4 __ovld __cnfn convert_uchar4_rtz(long4); uchar4 __ovld __cnfn convert_uchar4_sat_rtz(long4); uchar4 __ovld __cnfn convert_uchar4_rtp(long4); uchar4 __ovld __cnfn convert_uchar4_sat_rtp(long4); uchar4 __ovld __cnfn convert_uchar4_rtn(long4); uchar4 __ovld __cnfn convert_uchar4_sat_rtn(long4); uchar4 __ovld __cnfn convert_uchar4(long4); uchar4 __ovld __cnfn convert_uchar4_sat(long4); uchar4 __ovld __cnfn convert_uchar4_rte(ulong4); uchar4 __ovld __cnfn convert_uchar4_sat_rte(ulong4); uchar4 __ovld __cnfn convert_uchar4_rtz(ulong4); uchar4 __ovld __cnfn convert_uchar4_sat_rtz(ulong4); uchar4 __ovld __cnfn convert_uchar4_rtp(ulong4); uchar4 __ovld __cnfn convert_uchar4_sat_rtp(ulong4); uchar4 __ovld __cnfn convert_uchar4_rtn(ulong4); uchar4 __ovld __cnfn convert_uchar4_sat_rtn(ulong4); uchar4 __ovld __cnfn convert_uchar4(ulong4); uchar4 __ovld __cnfn convert_uchar4_sat(ulong4); uchar4 __ovld __cnfn convert_uchar4_rte(float4); uchar4 __ovld __cnfn convert_uchar4_sat_rte(float4); uchar4 __ovld __cnfn convert_uchar4_rtz(float4); uchar4 __ovld __cnfn convert_uchar4_sat_rtz(float4); uchar4 __ovld __cnfn convert_uchar4_rtp(float4); uchar4 __ovld __cnfn convert_uchar4_sat_rtp(float4); uchar4 __ovld __cnfn convert_uchar4_rtn(float4); uchar4 __ovld __cnfn convert_uchar4_sat_rtn(float4); uchar4 __ovld __cnfn convert_uchar4(float4); uchar4 __ovld __cnfn convert_uchar4_sat(float4); short4 __ovld __cnfn convert_short4_rte(char4); short4 __ovld __cnfn convert_short4_sat_rte(char4); short4 __ovld __cnfn convert_short4_rtz(char4); short4 __ovld __cnfn convert_short4_sat_rtz(char4); short4 __ovld __cnfn convert_short4_rtp(char4); short4 __ovld __cnfn convert_short4_sat_rtp(char4); short4 __ovld __cnfn convert_short4_rtn(char4); short4 __ovld __cnfn convert_short4_sat_rtn(char4); short4 __ovld __cnfn convert_short4(char4); short4 __ovld __cnfn convert_short4_sat(char4); short4 __ovld __cnfn convert_short4_rte(uchar4); short4 __ovld __cnfn convert_short4_sat_rte(uchar4); short4 __ovld __cnfn convert_short4_rtz(uchar4); short4 __ovld __cnfn convert_short4_sat_rtz(uchar4); short4 __ovld __cnfn convert_short4_rtp(uchar4); short4 __ovld __cnfn convert_short4_sat_rtp(uchar4); short4 __ovld __cnfn convert_short4_rtn(uchar4); short4 __ovld __cnfn convert_short4_sat_rtn(uchar4); short4 __ovld __cnfn convert_short4(uchar4); short4 __ovld __cnfn convert_short4_sat(uchar4); short4 __ovld __cnfn convert_short4_rte(short4); short4 __ovld __cnfn convert_short4_sat_rte(short4); short4 __ovld __cnfn convert_short4_rtz(short4); short4 __ovld __cnfn convert_short4_sat_rtz(short4); short4 __ovld __cnfn convert_short4_rtp(short4); short4 __ovld __cnfn convert_short4_sat_rtp(short4); short4 __ovld __cnfn convert_short4_rtn(short4); short4 __ovld __cnfn convert_short4_sat_rtn(short4); short4 __ovld __cnfn convert_short4(short4); short4 __ovld __cnfn convert_short4_sat(short4); short4 __ovld __cnfn convert_short4_rte(ushort4); short4 __ovld __cnfn convert_short4_sat_rte(ushort4); short4 __ovld __cnfn convert_short4_rtz(ushort4); short4 __ovld __cnfn convert_short4_sat_rtz(ushort4); short4 __ovld __cnfn convert_short4_rtp(ushort4); short4 __ovld __cnfn convert_short4_sat_rtp(ushort4); short4 __ovld __cnfn convert_short4_rtn(ushort4); short4 __ovld __cnfn convert_short4_sat_rtn(ushort4); short4 __ovld __cnfn convert_short4(ushort4); short4 __ovld __cnfn convert_short4_sat(ushort4); short4 __ovld __cnfn convert_short4_rte(int4); short4 __ovld __cnfn convert_short4_sat_rte(int4); short4 __ovld __cnfn convert_short4_rtz(int4); short4 __ovld __cnfn convert_short4_sat_rtz(int4); short4 __ovld __cnfn convert_short4_rtp(int4); short4 __ovld __cnfn convert_short4_sat_rtp(int4); short4 __ovld __cnfn convert_short4_rtn(int4); short4 __ovld __cnfn convert_short4_sat_rtn(int4); short4 __ovld __cnfn convert_short4(int4); short4 __ovld __cnfn convert_short4_sat(int4); short4 __ovld __cnfn convert_short4_rte(uint4); short4 __ovld __cnfn convert_short4_sat_rte(uint4); short4 __ovld __cnfn convert_short4_rtz(uint4); short4 __ovld __cnfn convert_short4_sat_rtz(uint4); short4 __ovld __cnfn convert_short4_rtp(uint4); short4 __ovld __cnfn convert_short4_sat_rtp(uint4); short4 __ovld __cnfn convert_short4_rtn(uint4); short4 __ovld __cnfn convert_short4_sat_rtn(uint4); short4 __ovld __cnfn convert_short4(uint4); short4 __ovld __cnfn convert_short4_sat(uint4); short4 __ovld __cnfn convert_short4_rte(long4); short4 __ovld __cnfn convert_short4_sat_rte(long4); short4 __ovld __cnfn convert_short4_rtz(long4); short4 __ovld __cnfn convert_short4_sat_rtz(long4); short4 __ovld __cnfn convert_short4_rtp(long4); short4 __ovld __cnfn convert_short4_sat_rtp(long4); short4 __ovld __cnfn convert_short4_rtn(long4); short4 __ovld __cnfn convert_short4_sat_rtn(long4); short4 __ovld __cnfn convert_short4(long4); short4 __ovld __cnfn convert_short4_sat(long4); short4 __ovld __cnfn convert_short4_rte(ulong4); short4 __ovld __cnfn convert_short4_sat_rte(ulong4); short4 __ovld __cnfn convert_short4_rtz(ulong4); short4 __ovld __cnfn convert_short4_sat_rtz(ulong4); short4 __ovld __cnfn convert_short4_rtp(ulong4); short4 __ovld __cnfn convert_short4_sat_rtp(ulong4); short4 __ovld __cnfn convert_short4_rtn(ulong4); short4 __ovld __cnfn convert_short4_sat_rtn(ulong4); short4 __ovld __cnfn convert_short4(ulong4); short4 __ovld __cnfn convert_short4_sat(ulong4); short4 __ovld __cnfn convert_short4_rte(float4); short4 __ovld __cnfn convert_short4_sat_rte(float4); short4 __ovld __cnfn convert_short4_rtz(float4); short4 __ovld __cnfn convert_short4_sat_rtz(float4); short4 __ovld __cnfn convert_short4_rtp(float4); short4 __ovld __cnfn convert_short4_sat_rtp(float4); short4 __ovld __cnfn convert_short4_rtn(float4); short4 __ovld __cnfn convert_short4_sat_rtn(float4); short4 __ovld __cnfn convert_short4(float4); short4 __ovld __cnfn convert_short4_sat(float4); ushort4 __ovld __cnfn convert_ushort4_rte(char4); ushort4 __ovld __cnfn convert_ushort4_sat_rte(char4); ushort4 __ovld __cnfn convert_ushort4_rtz(char4); ushort4 __ovld __cnfn convert_ushort4_sat_rtz(char4); ushort4 __ovld __cnfn convert_ushort4_rtp(char4); ushort4 __ovld __cnfn convert_ushort4_sat_rtp(char4); ushort4 __ovld __cnfn convert_ushort4_rtn(char4); ushort4 __ovld __cnfn convert_ushort4_sat_rtn(char4); ushort4 __ovld __cnfn convert_ushort4(char4); ushort4 __ovld __cnfn convert_ushort4_sat(char4); ushort4 __ovld __cnfn convert_ushort4_rte(uchar4); ushort4 __ovld __cnfn convert_ushort4_sat_rte(uchar4); ushort4 __ovld __cnfn convert_ushort4_rtz(uchar4); ushort4 __ovld __cnfn convert_ushort4_sat_rtz(uchar4); ushort4 __ovld __cnfn convert_ushort4_rtp(uchar4); ushort4 __ovld __cnfn convert_ushort4_sat_rtp(uchar4); ushort4 __ovld __cnfn convert_ushort4_rtn(uchar4); ushort4 __ovld __cnfn convert_ushort4_sat_rtn(uchar4); ushort4 __ovld __cnfn convert_ushort4(uchar4); ushort4 __ovld __cnfn convert_ushort4_sat(uchar4); ushort4 __ovld __cnfn convert_ushort4_rte(short4); ushort4 __ovld __cnfn convert_ushort4_sat_rte(short4); ushort4 __ovld __cnfn convert_ushort4_rtz(short4); ushort4 __ovld __cnfn convert_ushort4_sat_rtz(short4); ushort4 __ovld __cnfn convert_ushort4_rtp(short4); ushort4 __ovld __cnfn convert_ushort4_sat_rtp(short4); ushort4 __ovld __cnfn convert_ushort4_rtn(short4); ushort4 __ovld __cnfn convert_ushort4_sat_rtn(short4); ushort4 __ovld __cnfn convert_ushort4(short4); ushort4 __ovld __cnfn convert_ushort4_sat(short4); ushort4 __ovld __cnfn convert_ushort4_rte(ushort4); ushort4 __ovld __cnfn convert_ushort4_sat_rte(ushort4); ushort4 __ovld __cnfn convert_ushort4_rtz(ushort4); ushort4 __ovld __cnfn convert_ushort4_sat_rtz(ushort4); ushort4 __ovld __cnfn convert_ushort4_rtp(ushort4); ushort4 __ovld __cnfn convert_ushort4_sat_rtp(ushort4); ushort4 __ovld __cnfn convert_ushort4_rtn(ushort4); ushort4 __ovld __cnfn convert_ushort4_sat_rtn(ushort4); ushort4 __ovld __cnfn convert_ushort4(ushort4); ushort4 __ovld __cnfn convert_ushort4_sat(ushort4); ushort4 __ovld __cnfn convert_ushort4_rte(int4); ushort4 __ovld __cnfn convert_ushort4_sat_rte(int4); ushort4 __ovld __cnfn convert_ushort4_rtz(int4); ushort4 __ovld __cnfn convert_ushort4_sat_rtz(int4); ushort4 __ovld __cnfn convert_ushort4_rtp(int4); ushort4 __ovld __cnfn convert_ushort4_sat_rtp(int4); ushort4 __ovld __cnfn convert_ushort4_rtn(int4); ushort4 __ovld __cnfn convert_ushort4_sat_rtn(int4); ushort4 __ovld __cnfn convert_ushort4(int4); ushort4 __ovld __cnfn convert_ushort4_sat(int4); ushort4 __ovld __cnfn convert_ushort4_rte(uint4); ushort4 __ovld __cnfn convert_ushort4_sat_rte(uint4); ushort4 __ovld __cnfn convert_ushort4_rtz(uint4); ushort4 __ovld __cnfn convert_ushort4_sat_rtz(uint4); ushort4 __ovld __cnfn convert_ushort4_rtp(uint4); ushort4 __ovld __cnfn convert_ushort4_sat_rtp(uint4); ushort4 __ovld __cnfn convert_ushort4_rtn(uint4); ushort4 __ovld __cnfn convert_ushort4_sat_rtn(uint4); ushort4 __ovld __cnfn convert_ushort4(uint4); ushort4 __ovld __cnfn convert_ushort4_sat(uint4); ushort4 __ovld __cnfn convert_ushort4_rte(long4); ushort4 __ovld __cnfn convert_ushort4_sat_rte(long4); ushort4 __ovld __cnfn convert_ushort4_rtz(long4); ushort4 __ovld __cnfn convert_ushort4_sat_rtz(long4); ushort4 __ovld __cnfn convert_ushort4_rtp(long4); ushort4 __ovld __cnfn convert_ushort4_sat_rtp(long4); ushort4 __ovld __cnfn convert_ushort4_rtn(long4); ushort4 __ovld __cnfn convert_ushort4_sat_rtn(long4); ushort4 __ovld __cnfn convert_ushort4(long4); ushort4 __ovld __cnfn convert_ushort4_sat(long4); ushort4 __ovld __cnfn convert_ushort4_rte(ulong4); ushort4 __ovld __cnfn convert_ushort4_sat_rte(ulong4); ushort4 __ovld __cnfn convert_ushort4_rtz(ulong4); ushort4 __ovld __cnfn convert_ushort4_sat_rtz(ulong4); ushort4 __ovld __cnfn convert_ushort4_rtp(ulong4); ushort4 __ovld __cnfn convert_ushort4_sat_rtp(ulong4); ushort4 __ovld __cnfn convert_ushort4_rtn(ulong4); ushort4 __ovld __cnfn convert_ushort4_sat_rtn(ulong4); ushort4 __ovld __cnfn convert_ushort4(ulong4); ushort4 __ovld __cnfn convert_ushort4_sat(ulong4); ushort4 __ovld __cnfn convert_ushort4_rte(float4); ushort4 __ovld __cnfn convert_ushort4_sat_rte(float4); ushort4 __ovld __cnfn convert_ushort4_rtz(float4); ushort4 __ovld __cnfn convert_ushort4_sat_rtz(float4); ushort4 __ovld __cnfn convert_ushort4_rtp(float4); ushort4 __ovld __cnfn convert_ushort4_sat_rtp(float4); ushort4 __ovld __cnfn convert_ushort4_rtn(float4); ushort4 __ovld __cnfn convert_ushort4_sat_rtn(float4); ushort4 __ovld __cnfn convert_ushort4(float4); ushort4 __ovld __cnfn convert_ushort4_sat(float4); int4 __ovld __cnfn convert_int4_rte(char4); int4 __ovld __cnfn convert_int4_sat_rte(char4); int4 __ovld __cnfn convert_int4_rtz(char4); int4 __ovld __cnfn convert_int4_sat_rtz(char4); int4 __ovld __cnfn convert_int4_rtp(char4); int4 __ovld __cnfn convert_int4_sat_rtp(char4); int4 __ovld __cnfn convert_int4_rtn(char4); int4 __ovld __cnfn convert_int4_sat_rtn(char4); int4 __ovld __cnfn convert_int4(char4); int4 __ovld __cnfn convert_int4_sat(char4); int4 __ovld __cnfn convert_int4_rte(uchar4); int4 __ovld __cnfn convert_int4_sat_rte(uchar4); int4 __ovld __cnfn convert_int4_rtz(uchar4); int4 __ovld __cnfn convert_int4_sat_rtz(uchar4); int4 __ovld __cnfn convert_int4_rtp(uchar4); int4 __ovld __cnfn convert_int4_sat_rtp(uchar4); int4 __ovld __cnfn convert_int4_rtn(uchar4); int4 __ovld __cnfn convert_int4_sat_rtn(uchar4); int4 __ovld __cnfn convert_int4(uchar4); int4 __ovld __cnfn convert_int4_sat(uchar4); int4 __ovld __cnfn convert_int4_rte(short4); int4 __ovld __cnfn convert_int4_sat_rte(short4); int4 __ovld __cnfn convert_int4_rtz(short4); int4 __ovld __cnfn convert_int4_sat_rtz(short4); int4 __ovld __cnfn convert_int4_rtp(short4); int4 __ovld __cnfn convert_int4_sat_rtp(short4); int4 __ovld __cnfn convert_int4_rtn(short4); int4 __ovld __cnfn convert_int4_sat_rtn(short4); int4 __ovld __cnfn convert_int4(short4); int4 __ovld __cnfn convert_int4_sat(short4); int4 __ovld __cnfn convert_int4_rte(ushort4); int4 __ovld __cnfn convert_int4_sat_rte(ushort4); int4 __ovld __cnfn convert_int4_rtz(ushort4); int4 __ovld __cnfn convert_int4_sat_rtz(ushort4); int4 __ovld __cnfn convert_int4_rtp(ushort4); int4 __ovld __cnfn convert_int4_sat_rtp(ushort4); int4 __ovld __cnfn convert_int4_rtn(ushort4); int4 __ovld __cnfn convert_int4_sat_rtn(ushort4); int4 __ovld __cnfn convert_int4(ushort4); int4 __ovld __cnfn convert_int4_sat(ushort4); int4 __ovld __cnfn convert_int4_rte(int4); int4 __ovld __cnfn convert_int4_sat_rte(int4); int4 __ovld __cnfn convert_int4_rtz(int4); int4 __ovld __cnfn convert_int4_sat_rtz(int4); int4 __ovld __cnfn convert_int4_rtp(int4); int4 __ovld __cnfn convert_int4_sat_rtp(int4); int4 __ovld __cnfn convert_int4_rtn(int4); int4 __ovld __cnfn convert_int4_sat_rtn(int4); int4 __ovld __cnfn convert_int4(int4); int4 __ovld __cnfn convert_int4_sat(int4); int4 __ovld __cnfn convert_int4_rte(uint4); int4 __ovld __cnfn convert_int4_sat_rte(uint4); int4 __ovld __cnfn convert_int4_rtz(uint4); int4 __ovld __cnfn convert_int4_sat_rtz(uint4); int4 __ovld __cnfn convert_int4_rtp(uint4); int4 __ovld __cnfn convert_int4_sat_rtp(uint4); int4 __ovld __cnfn convert_int4_rtn(uint4); int4 __ovld __cnfn convert_int4_sat_rtn(uint4); int4 __ovld __cnfn convert_int4(uint4); int4 __ovld __cnfn convert_int4_sat(uint4); int4 __ovld __cnfn convert_int4_rte(long4); int4 __ovld __cnfn convert_int4_sat_rte(long4); int4 __ovld __cnfn convert_int4_rtz(long4); int4 __ovld __cnfn convert_int4_sat_rtz(long4); int4 __ovld __cnfn convert_int4_rtp(long4); int4 __ovld __cnfn convert_int4_sat_rtp(long4); int4 __ovld __cnfn convert_int4_rtn(long4); int4 __ovld __cnfn convert_int4_sat_rtn(long4); int4 __ovld __cnfn convert_int4(long4); int4 __ovld __cnfn convert_int4_sat(long4); int4 __ovld __cnfn convert_int4_rte(ulong4); int4 __ovld __cnfn convert_int4_sat_rte(ulong4); int4 __ovld __cnfn convert_int4_rtz(ulong4); int4 __ovld __cnfn convert_int4_sat_rtz(ulong4); int4 __ovld __cnfn convert_int4_rtp(ulong4); int4 __ovld __cnfn convert_int4_sat_rtp(ulong4); int4 __ovld __cnfn convert_int4_rtn(ulong4); int4 __ovld __cnfn convert_int4_sat_rtn(ulong4); int4 __ovld __cnfn convert_int4(ulong4); int4 __ovld __cnfn convert_int4_sat(ulong4); int4 __ovld __cnfn convert_int4_rte(float4); int4 __ovld __cnfn convert_int4_sat_rte(float4); int4 __ovld __cnfn convert_int4_rtz(float4); int4 __ovld __cnfn convert_int4_sat_rtz(float4); int4 __ovld __cnfn convert_int4_rtp(float4); int4 __ovld __cnfn convert_int4_sat_rtp(float4); int4 __ovld __cnfn convert_int4_rtn(float4); int4 __ovld __cnfn convert_int4_sat_rtn(float4); int4 __ovld __cnfn convert_int4(float4); int4 __ovld __cnfn convert_int4_sat(float4); uint4 __ovld __cnfn convert_uint4_rte(char4); uint4 __ovld __cnfn convert_uint4_sat_rte(char4); uint4 __ovld __cnfn convert_uint4_rtz(char4); uint4 __ovld __cnfn convert_uint4_sat_rtz(char4); uint4 __ovld __cnfn convert_uint4_rtp(char4); uint4 __ovld __cnfn convert_uint4_sat_rtp(char4); uint4 __ovld __cnfn convert_uint4_rtn(char4); uint4 __ovld __cnfn convert_uint4_sat_rtn(char4); uint4 __ovld __cnfn convert_uint4(char4); uint4 __ovld __cnfn convert_uint4_sat(char4); uint4 __ovld __cnfn convert_uint4_rte(uchar4); uint4 __ovld __cnfn convert_uint4_sat_rte(uchar4); uint4 __ovld __cnfn convert_uint4_rtz(uchar4); uint4 __ovld __cnfn convert_uint4_sat_rtz(uchar4); uint4 __ovld __cnfn convert_uint4_rtp(uchar4); uint4 __ovld __cnfn convert_uint4_sat_rtp(uchar4); uint4 __ovld __cnfn convert_uint4_rtn(uchar4); uint4 __ovld __cnfn convert_uint4_sat_rtn(uchar4); uint4 __ovld __cnfn convert_uint4(uchar4); uint4 __ovld __cnfn convert_uint4_sat(uchar4); uint4 __ovld __cnfn convert_uint4_rte(short4); uint4 __ovld __cnfn convert_uint4_sat_rte(short4); uint4 __ovld __cnfn convert_uint4_rtz(short4); uint4 __ovld __cnfn convert_uint4_sat_rtz(short4); uint4 __ovld __cnfn convert_uint4_rtp(short4); uint4 __ovld __cnfn convert_uint4_sat_rtp(short4); uint4 __ovld __cnfn convert_uint4_rtn(short4); uint4 __ovld __cnfn convert_uint4_sat_rtn(short4); uint4 __ovld __cnfn convert_uint4(short4); uint4 __ovld __cnfn convert_uint4_sat(short4); uint4 __ovld __cnfn convert_uint4_rte(ushort4); uint4 __ovld __cnfn convert_uint4_sat_rte(ushort4); uint4 __ovld __cnfn convert_uint4_rtz(ushort4); uint4 __ovld __cnfn convert_uint4_sat_rtz(ushort4); uint4 __ovld __cnfn convert_uint4_rtp(ushort4); uint4 __ovld __cnfn convert_uint4_sat_rtp(ushort4); uint4 __ovld __cnfn convert_uint4_rtn(ushort4); uint4 __ovld __cnfn convert_uint4_sat_rtn(ushort4); uint4 __ovld __cnfn convert_uint4(ushort4); uint4 __ovld __cnfn convert_uint4_sat(ushort4); uint4 __ovld __cnfn convert_uint4_rte(int4); uint4 __ovld __cnfn convert_uint4_sat_rte(int4); uint4 __ovld __cnfn convert_uint4_rtz(int4); uint4 __ovld __cnfn convert_uint4_sat_rtz(int4); uint4 __ovld __cnfn convert_uint4_rtp(int4); uint4 __ovld __cnfn convert_uint4_sat_rtp(int4); uint4 __ovld __cnfn convert_uint4_rtn(int4); uint4 __ovld __cnfn convert_uint4_sat_rtn(int4); uint4 __ovld __cnfn convert_uint4(int4); uint4 __ovld __cnfn convert_uint4_sat(int4); uint4 __ovld __cnfn convert_uint4_rte(uint4); uint4 __ovld __cnfn convert_uint4_sat_rte(uint4); uint4 __ovld __cnfn convert_uint4_rtz(uint4); uint4 __ovld __cnfn convert_uint4_sat_rtz(uint4); uint4 __ovld __cnfn convert_uint4_rtp(uint4); uint4 __ovld __cnfn convert_uint4_sat_rtp(uint4); uint4 __ovld __cnfn convert_uint4_rtn(uint4); uint4 __ovld __cnfn convert_uint4_sat_rtn(uint4); uint4 __ovld __cnfn convert_uint4(uint4); uint4 __ovld __cnfn convert_uint4_sat(uint4); uint4 __ovld __cnfn convert_uint4_rte(long4); uint4 __ovld __cnfn convert_uint4_sat_rte(long4); uint4 __ovld __cnfn convert_uint4_rtz(long4); uint4 __ovld __cnfn convert_uint4_sat_rtz(long4); uint4 __ovld __cnfn convert_uint4_rtp(long4); uint4 __ovld __cnfn convert_uint4_sat_rtp(long4); uint4 __ovld __cnfn convert_uint4_rtn(long4); uint4 __ovld __cnfn convert_uint4_sat_rtn(long4); uint4 __ovld __cnfn convert_uint4(long4); uint4 __ovld __cnfn convert_uint4_sat(long4); uint4 __ovld __cnfn convert_uint4_rte(ulong4); uint4 __ovld __cnfn convert_uint4_sat_rte(ulong4); uint4 __ovld __cnfn convert_uint4_rtz(ulong4); uint4 __ovld __cnfn convert_uint4_sat_rtz(ulong4); uint4 __ovld __cnfn convert_uint4_rtp(ulong4); uint4 __ovld __cnfn convert_uint4_sat_rtp(ulong4); uint4 __ovld __cnfn convert_uint4_rtn(ulong4); uint4 __ovld __cnfn convert_uint4_sat_rtn(ulong4); uint4 __ovld __cnfn convert_uint4(ulong4); uint4 __ovld __cnfn convert_uint4_sat(ulong4); uint4 __ovld __cnfn convert_uint4_rte(float4); uint4 __ovld __cnfn convert_uint4_sat_rte(float4); uint4 __ovld __cnfn convert_uint4_rtz(float4); uint4 __ovld __cnfn convert_uint4_sat_rtz(float4); uint4 __ovld __cnfn convert_uint4_rtp(float4); uint4 __ovld __cnfn convert_uint4_sat_rtp(float4); uint4 __ovld __cnfn convert_uint4_rtn(float4); uint4 __ovld __cnfn convert_uint4_sat_rtn(float4); uint4 __ovld __cnfn convert_uint4(float4); uint4 __ovld __cnfn convert_uint4_sat(float4); long4 __ovld __cnfn convert_long4_rte(char4); long4 __ovld __cnfn convert_long4_sat_rte(char4); long4 __ovld __cnfn convert_long4_rtz(char4); long4 __ovld __cnfn convert_long4_sat_rtz(char4); long4 __ovld __cnfn convert_long4_rtp(char4); long4 __ovld __cnfn convert_long4_sat_rtp(char4); long4 __ovld __cnfn convert_long4_rtn(char4); long4 __ovld __cnfn convert_long4_sat_rtn(char4); long4 __ovld __cnfn convert_long4(char4); long4 __ovld __cnfn convert_long4_sat(char4); long4 __ovld __cnfn convert_long4_rte(uchar4); long4 __ovld __cnfn convert_long4_sat_rte(uchar4); long4 __ovld __cnfn convert_long4_rtz(uchar4); long4 __ovld __cnfn convert_long4_sat_rtz(uchar4); long4 __ovld __cnfn convert_long4_rtp(uchar4); long4 __ovld __cnfn convert_long4_sat_rtp(uchar4); long4 __ovld __cnfn convert_long4_rtn(uchar4); long4 __ovld __cnfn convert_long4_sat_rtn(uchar4); long4 __ovld __cnfn convert_long4(uchar4); long4 __ovld __cnfn convert_long4_sat(uchar4); long4 __ovld __cnfn convert_long4_rte(short4); long4 __ovld __cnfn convert_long4_sat_rte(short4); long4 __ovld __cnfn convert_long4_rtz(short4); long4 __ovld __cnfn convert_long4_sat_rtz(short4); long4 __ovld __cnfn convert_long4_rtp(short4); long4 __ovld __cnfn convert_long4_sat_rtp(short4); long4 __ovld __cnfn convert_long4_rtn(short4); long4 __ovld __cnfn convert_long4_sat_rtn(short4); long4 __ovld __cnfn convert_long4(short4); long4 __ovld __cnfn convert_long4_sat(short4); long4 __ovld __cnfn convert_long4_rte(ushort4); long4 __ovld __cnfn convert_long4_sat_rte(ushort4); long4 __ovld __cnfn convert_long4_rtz(ushort4); long4 __ovld __cnfn convert_long4_sat_rtz(ushort4); long4 __ovld __cnfn convert_long4_rtp(ushort4); long4 __ovld __cnfn convert_long4_sat_rtp(ushort4); long4 __ovld __cnfn convert_long4_rtn(ushort4); long4 __ovld __cnfn convert_long4_sat_rtn(ushort4); long4 __ovld __cnfn convert_long4(ushort4); long4 __ovld __cnfn convert_long4_sat(ushort4); long4 __ovld __cnfn convert_long4_rte(int4); long4 __ovld __cnfn convert_long4_sat_rte(int4); long4 __ovld __cnfn convert_long4_rtz(int4); long4 __ovld __cnfn convert_long4_sat_rtz(int4); long4 __ovld __cnfn convert_long4_rtp(int4); long4 __ovld __cnfn convert_long4_sat_rtp(int4); long4 __ovld __cnfn convert_long4_rtn(int4); long4 __ovld __cnfn convert_long4_sat_rtn(int4); long4 __ovld __cnfn convert_long4(int4); long4 __ovld __cnfn convert_long4_sat(int4); long4 __ovld __cnfn convert_long4_rte(uint4); long4 __ovld __cnfn convert_long4_sat_rte(uint4); long4 __ovld __cnfn convert_long4_rtz(uint4); long4 __ovld __cnfn convert_long4_sat_rtz(uint4); long4 __ovld __cnfn convert_long4_rtp(uint4); long4 __ovld __cnfn convert_long4_sat_rtp(uint4); long4 __ovld __cnfn convert_long4_rtn(uint4); long4 __ovld __cnfn convert_long4_sat_rtn(uint4); long4 __ovld __cnfn convert_long4(uint4); long4 __ovld __cnfn convert_long4_sat(uint4); long4 __ovld __cnfn convert_long4_rte(long4); long4 __ovld __cnfn convert_long4_sat_rte(long4); long4 __ovld __cnfn convert_long4_rtz(long4); long4 __ovld __cnfn convert_long4_sat_rtz(long4); long4 __ovld __cnfn convert_long4_rtp(long4); long4 __ovld __cnfn convert_long4_sat_rtp(long4); long4 __ovld __cnfn convert_long4_rtn(long4); long4 __ovld __cnfn convert_long4_sat_rtn(long4); long4 __ovld __cnfn convert_long4(long4); long4 __ovld __cnfn convert_long4_sat(long4); long4 __ovld __cnfn convert_long4_rte(ulong4); long4 __ovld __cnfn convert_long4_sat_rte(ulong4); long4 __ovld __cnfn convert_long4_rtz(ulong4); long4 __ovld __cnfn convert_long4_sat_rtz(ulong4); long4 __ovld __cnfn convert_long4_rtp(ulong4); long4 __ovld __cnfn convert_long4_sat_rtp(ulong4); long4 __ovld __cnfn convert_long4_rtn(ulong4); long4 __ovld __cnfn convert_long4_sat_rtn(ulong4); long4 __ovld __cnfn convert_long4(ulong4); long4 __ovld __cnfn convert_long4_sat(ulong4); long4 __ovld __cnfn convert_long4_rte(float4); long4 __ovld __cnfn convert_long4_sat_rte(float4); long4 __ovld __cnfn convert_long4_rtz(float4); long4 __ovld __cnfn convert_long4_sat_rtz(float4); long4 __ovld __cnfn convert_long4_rtp(float4); long4 __ovld __cnfn convert_long4_sat_rtp(float4); long4 __ovld __cnfn convert_long4_rtn(float4); long4 __ovld __cnfn convert_long4_sat_rtn(float4); long4 __ovld __cnfn convert_long4(float4); long4 __ovld __cnfn convert_long4_sat(float4); ulong4 __ovld __cnfn convert_ulong4_rte(char4); ulong4 __ovld __cnfn convert_ulong4_sat_rte(char4); ulong4 __ovld __cnfn convert_ulong4_rtz(char4); ulong4 __ovld __cnfn convert_ulong4_sat_rtz(char4); ulong4 __ovld __cnfn convert_ulong4_rtp(char4); ulong4 __ovld __cnfn convert_ulong4_sat_rtp(char4); ulong4 __ovld __cnfn convert_ulong4_rtn(char4); ulong4 __ovld __cnfn convert_ulong4_sat_rtn(char4); ulong4 __ovld __cnfn convert_ulong4(char4); ulong4 __ovld __cnfn convert_ulong4_sat(char4); ulong4 __ovld __cnfn convert_ulong4_rte(uchar4); ulong4 __ovld __cnfn convert_ulong4_sat_rte(uchar4); ulong4 __ovld __cnfn convert_ulong4_rtz(uchar4); ulong4 __ovld __cnfn convert_ulong4_sat_rtz(uchar4); ulong4 __ovld __cnfn convert_ulong4_rtp(uchar4); ulong4 __ovld __cnfn convert_ulong4_sat_rtp(uchar4); ulong4 __ovld __cnfn convert_ulong4_rtn(uchar4); ulong4 __ovld __cnfn convert_ulong4_sat_rtn(uchar4); ulong4 __ovld __cnfn convert_ulong4(uchar4); ulong4 __ovld __cnfn convert_ulong4_sat(uchar4); ulong4 __ovld __cnfn convert_ulong4_rte(short4); ulong4 __ovld __cnfn convert_ulong4_sat_rte(short4); ulong4 __ovld __cnfn convert_ulong4_rtz(short4); ulong4 __ovld __cnfn convert_ulong4_sat_rtz(short4); ulong4 __ovld __cnfn convert_ulong4_rtp(short4); ulong4 __ovld __cnfn convert_ulong4_sat_rtp(short4); ulong4 __ovld __cnfn convert_ulong4_rtn(short4); ulong4 __ovld __cnfn convert_ulong4_sat_rtn(short4); ulong4 __ovld __cnfn convert_ulong4(short4); ulong4 __ovld __cnfn convert_ulong4_sat(short4); ulong4 __ovld __cnfn convert_ulong4_rte(ushort4); ulong4 __ovld __cnfn convert_ulong4_sat_rte(ushort4); ulong4 __ovld __cnfn convert_ulong4_rtz(ushort4); ulong4 __ovld __cnfn convert_ulong4_sat_rtz(ushort4); ulong4 __ovld __cnfn convert_ulong4_rtp(ushort4); ulong4 __ovld __cnfn convert_ulong4_sat_rtp(ushort4); ulong4 __ovld __cnfn convert_ulong4_rtn(ushort4); ulong4 __ovld __cnfn convert_ulong4_sat_rtn(ushort4); ulong4 __ovld __cnfn convert_ulong4(ushort4); ulong4 __ovld __cnfn convert_ulong4_sat(ushort4); ulong4 __ovld __cnfn convert_ulong4_rte(int4); ulong4 __ovld __cnfn convert_ulong4_sat_rte(int4); ulong4 __ovld __cnfn convert_ulong4_rtz(int4); ulong4 __ovld __cnfn convert_ulong4_sat_rtz(int4); ulong4 __ovld __cnfn convert_ulong4_rtp(int4); ulong4 __ovld __cnfn convert_ulong4_sat_rtp(int4); ulong4 __ovld __cnfn convert_ulong4_rtn(int4); ulong4 __ovld __cnfn convert_ulong4_sat_rtn(int4); ulong4 __ovld __cnfn convert_ulong4(int4); ulong4 __ovld __cnfn convert_ulong4_sat(int4); ulong4 __ovld __cnfn convert_ulong4_rte(uint4); ulong4 __ovld __cnfn convert_ulong4_sat_rte(uint4); ulong4 __ovld __cnfn convert_ulong4_rtz(uint4); ulong4 __ovld __cnfn convert_ulong4_sat_rtz(uint4); ulong4 __ovld __cnfn convert_ulong4_rtp(uint4); ulong4 __ovld __cnfn convert_ulong4_sat_rtp(uint4); ulong4 __ovld __cnfn convert_ulong4_rtn(uint4); ulong4 __ovld __cnfn convert_ulong4_sat_rtn(uint4); ulong4 __ovld __cnfn convert_ulong4(uint4); ulong4 __ovld __cnfn convert_ulong4_sat(uint4); ulong4 __ovld __cnfn convert_ulong4_rte(long4); ulong4 __ovld __cnfn convert_ulong4_sat_rte(long4); ulong4 __ovld __cnfn convert_ulong4_rtz(long4); ulong4 __ovld __cnfn convert_ulong4_sat_rtz(long4); ulong4 __ovld __cnfn convert_ulong4_rtp(long4); ulong4 __ovld __cnfn convert_ulong4_sat_rtp(long4); ulong4 __ovld __cnfn convert_ulong4_rtn(long4); ulong4 __ovld __cnfn convert_ulong4_sat_rtn(long4); ulong4 __ovld __cnfn convert_ulong4(long4); ulong4 __ovld __cnfn convert_ulong4_sat(long4); ulong4 __ovld __cnfn convert_ulong4_rte(ulong4); ulong4 __ovld __cnfn convert_ulong4_sat_rte(ulong4); ulong4 __ovld __cnfn convert_ulong4_rtz(ulong4); ulong4 __ovld __cnfn convert_ulong4_sat_rtz(ulong4); ulong4 __ovld __cnfn convert_ulong4_rtp(ulong4); ulong4 __ovld __cnfn convert_ulong4_sat_rtp(ulong4); ulong4 __ovld __cnfn convert_ulong4_rtn(ulong4); ulong4 __ovld __cnfn convert_ulong4_sat_rtn(ulong4); ulong4 __ovld __cnfn convert_ulong4(ulong4); ulong4 __ovld __cnfn convert_ulong4_sat(ulong4); ulong4 __ovld __cnfn convert_ulong4_rte(float4); ulong4 __ovld __cnfn convert_ulong4_sat_rte(float4); ulong4 __ovld __cnfn convert_ulong4_rtz(float4); ulong4 __ovld __cnfn convert_ulong4_sat_rtz(float4); ulong4 __ovld __cnfn convert_ulong4_rtp(float4); ulong4 __ovld __cnfn convert_ulong4_sat_rtp(float4); ulong4 __ovld __cnfn convert_ulong4_rtn(float4); ulong4 __ovld __cnfn convert_ulong4_sat_rtn(float4); ulong4 __ovld __cnfn convert_ulong4(float4); ulong4 __ovld __cnfn convert_ulong4_sat(float4); float4 __ovld __cnfn convert_float4_rte(char4); float4 __ovld __cnfn convert_float4_rtz(char4); float4 __ovld __cnfn convert_float4_rtp(char4); float4 __ovld __cnfn convert_float4_rtn(char4); float4 __ovld __cnfn convert_float4(char4); float4 __ovld __cnfn convert_float4_rte(uchar4); float4 __ovld __cnfn convert_float4_rtz(uchar4); float4 __ovld __cnfn convert_float4_rtp(uchar4); float4 __ovld __cnfn convert_float4_rtn(uchar4); float4 __ovld __cnfn convert_float4(uchar4); float4 __ovld __cnfn convert_float4_rte(short4); float4 __ovld __cnfn convert_float4_rtz(short4); float4 __ovld __cnfn convert_float4_rtp(short4); float4 __ovld __cnfn convert_float4_rtn(short4); float4 __ovld __cnfn convert_float4(short4); float4 __ovld __cnfn convert_float4_rte(ushort4); float4 __ovld __cnfn convert_float4_rtz(ushort4); float4 __ovld __cnfn convert_float4_rtp(ushort4); float4 __ovld __cnfn convert_float4_rtn(ushort4); float4 __ovld __cnfn convert_float4(ushort4); float4 __ovld __cnfn convert_float4_rte(int4); float4 __ovld __cnfn convert_float4_rtz(int4); float4 __ovld __cnfn convert_float4_rtp(int4); float4 __ovld __cnfn convert_float4_rtn(int4); float4 __ovld __cnfn convert_float4(int4); float4 __ovld __cnfn convert_float4_rte(uint4); float4 __ovld __cnfn convert_float4_rtz(uint4); float4 __ovld __cnfn convert_float4_rtp(uint4); float4 __ovld __cnfn convert_float4_rtn(uint4); float4 __ovld __cnfn convert_float4(uint4); float4 __ovld __cnfn convert_float4_rte(long4); float4 __ovld __cnfn convert_float4_rtz(long4); float4 __ovld __cnfn convert_float4_rtp(long4); float4 __ovld __cnfn convert_float4_rtn(long4); float4 __ovld __cnfn convert_float4(long4); float4 __ovld __cnfn convert_float4_rte(ulong4); float4 __ovld __cnfn convert_float4_rtz(ulong4); float4 __ovld __cnfn convert_float4_rtp(ulong4); float4 __ovld __cnfn convert_float4_rtn(ulong4); float4 __ovld __cnfn convert_float4(ulong4); float4 __ovld __cnfn convert_float4_rte(float4); float4 __ovld __cnfn convert_float4_rtz(float4); float4 __ovld __cnfn convert_float4_rtp(float4); float4 __ovld __cnfn convert_float4_rtn(float4); float4 __ovld __cnfn convert_float4(float4); char8 __ovld __cnfn convert_char8_rte(char8); char8 __ovld __cnfn convert_char8_sat_rte(char8); char8 __ovld __cnfn convert_char8_rtz(char8); char8 __ovld __cnfn convert_char8_sat_rtz(char8); char8 __ovld __cnfn convert_char8_rtp(char8); char8 __ovld __cnfn convert_char8_sat_rtp(char8); char8 __ovld __cnfn convert_char8_rtn(char8); char8 __ovld __cnfn convert_char8_sat_rtn(char8); char8 __ovld __cnfn convert_char8(char8); char8 __ovld __cnfn convert_char8_sat(char8); char8 __ovld __cnfn convert_char8_rte(uchar8); char8 __ovld __cnfn convert_char8_sat_rte(uchar8); char8 __ovld __cnfn convert_char8_rtz(uchar8); char8 __ovld __cnfn convert_char8_sat_rtz(uchar8); char8 __ovld __cnfn convert_char8_rtp(uchar8); char8 __ovld __cnfn convert_char8_sat_rtp(uchar8); char8 __ovld __cnfn convert_char8_rtn(uchar8); char8 __ovld __cnfn convert_char8_sat_rtn(uchar8); char8 __ovld __cnfn convert_char8(uchar8); char8 __ovld __cnfn convert_char8_sat(uchar8); char8 __ovld __cnfn convert_char8_rte(short8); char8 __ovld __cnfn convert_char8_sat_rte(short8); char8 __ovld __cnfn convert_char8_rtz(short8); char8 __ovld __cnfn convert_char8_sat_rtz(short8); char8 __ovld __cnfn convert_char8_rtp(short8); char8 __ovld __cnfn convert_char8_sat_rtp(short8); char8 __ovld __cnfn convert_char8_rtn(short8); char8 __ovld __cnfn convert_char8_sat_rtn(short8); char8 __ovld __cnfn convert_char8(short8); char8 __ovld __cnfn convert_char8_sat(short8); char8 __ovld __cnfn convert_char8_rte(ushort8); char8 __ovld __cnfn convert_char8_sat_rte(ushort8); char8 __ovld __cnfn convert_char8_rtz(ushort8); char8 __ovld __cnfn convert_char8_sat_rtz(ushort8); char8 __ovld __cnfn convert_char8_rtp(ushort8); char8 __ovld __cnfn convert_char8_sat_rtp(ushort8); char8 __ovld __cnfn convert_char8_rtn(ushort8); char8 __ovld __cnfn convert_char8_sat_rtn(ushort8); char8 __ovld __cnfn convert_char8(ushort8); char8 __ovld __cnfn convert_char8_sat(ushort8); char8 __ovld __cnfn convert_char8_rte(int8); char8 __ovld __cnfn convert_char8_sat_rte(int8); char8 __ovld __cnfn convert_char8_rtz(int8); char8 __ovld __cnfn convert_char8_sat_rtz(int8); char8 __ovld __cnfn convert_char8_rtp(int8); char8 __ovld __cnfn convert_char8_sat_rtp(int8); char8 __ovld __cnfn convert_char8_rtn(int8); char8 __ovld __cnfn convert_char8_sat_rtn(int8); char8 __ovld __cnfn convert_char8(int8); char8 __ovld __cnfn convert_char8_sat(int8); char8 __ovld __cnfn convert_char8_rte(uint8); char8 __ovld __cnfn convert_char8_sat_rte(uint8); char8 __ovld __cnfn convert_char8_rtz(uint8); char8 __ovld __cnfn convert_char8_sat_rtz(uint8); char8 __ovld __cnfn convert_char8_rtp(uint8); char8 __ovld __cnfn convert_char8_sat_rtp(uint8); char8 __ovld __cnfn convert_char8_rtn(uint8); char8 __ovld __cnfn convert_char8_sat_rtn(uint8); char8 __ovld __cnfn convert_char8(uint8); char8 __ovld __cnfn convert_char8_sat(uint8); char8 __ovld __cnfn convert_char8_rte(long8); char8 __ovld __cnfn convert_char8_sat_rte(long8); char8 __ovld __cnfn convert_char8_rtz(long8); char8 __ovld __cnfn convert_char8_sat_rtz(long8); char8 __ovld __cnfn convert_char8_rtp(long8); char8 __ovld __cnfn convert_char8_sat_rtp(long8); char8 __ovld __cnfn convert_char8_rtn(long8); char8 __ovld __cnfn convert_char8_sat_rtn(long8); char8 __ovld __cnfn convert_char8(long8); char8 __ovld __cnfn convert_char8_sat(long8); char8 __ovld __cnfn convert_char8_rte(ulong8); char8 __ovld __cnfn convert_char8_sat_rte(ulong8); char8 __ovld __cnfn convert_char8_rtz(ulong8); char8 __ovld __cnfn convert_char8_sat_rtz(ulong8); char8 __ovld __cnfn convert_char8_rtp(ulong8); char8 __ovld __cnfn convert_char8_sat_rtp(ulong8); char8 __ovld __cnfn convert_char8_rtn(ulong8); char8 __ovld __cnfn convert_char8_sat_rtn(ulong8); char8 __ovld __cnfn convert_char8(ulong8); char8 __ovld __cnfn convert_char8_sat(ulong8); char8 __ovld __cnfn convert_char8_rte(float8); char8 __ovld __cnfn convert_char8_sat_rte(float8); char8 __ovld __cnfn convert_char8_rtz(float8); char8 __ovld __cnfn convert_char8_sat_rtz(float8); char8 __ovld __cnfn convert_char8_rtp(float8); char8 __ovld __cnfn convert_char8_sat_rtp(float8); char8 __ovld __cnfn convert_char8_rtn(float8); char8 __ovld __cnfn convert_char8_sat_rtn(float8); char8 __ovld __cnfn convert_char8(float8); char8 __ovld __cnfn convert_char8_sat(float8); uchar8 __ovld __cnfn convert_uchar8_rte(char8); uchar8 __ovld __cnfn convert_uchar8_sat_rte(char8); uchar8 __ovld __cnfn convert_uchar8_rtz(char8); uchar8 __ovld __cnfn convert_uchar8_sat_rtz(char8); uchar8 __ovld __cnfn convert_uchar8_rtp(char8); uchar8 __ovld __cnfn convert_uchar8_sat_rtp(char8); uchar8 __ovld __cnfn convert_uchar8_rtn(char8); uchar8 __ovld __cnfn convert_uchar8_sat_rtn(char8); uchar8 __ovld __cnfn convert_uchar8(char8); uchar8 __ovld __cnfn convert_uchar8_sat(char8); uchar8 __ovld __cnfn convert_uchar8_rte(uchar8); uchar8 __ovld __cnfn convert_uchar8_sat_rte(uchar8); uchar8 __ovld __cnfn convert_uchar8_rtz(uchar8); uchar8 __ovld __cnfn convert_uchar8_sat_rtz(uchar8); uchar8 __ovld __cnfn convert_uchar8_rtp(uchar8); uchar8 __ovld __cnfn convert_uchar8_sat_rtp(uchar8); uchar8 __ovld __cnfn convert_uchar8_rtn(uchar8); uchar8 __ovld __cnfn convert_uchar8_sat_rtn(uchar8); uchar8 __ovld __cnfn convert_uchar8(uchar8); uchar8 __ovld __cnfn convert_uchar8_sat(uchar8); uchar8 __ovld __cnfn convert_uchar8_rte(short8); uchar8 __ovld __cnfn convert_uchar8_sat_rte(short8); uchar8 __ovld __cnfn convert_uchar8_rtz(short8); uchar8 __ovld __cnfn convert_uchar8_sat_rtz(short8); uchar8 __ovld __cnfn convert_uchar8_rtp(short8); uchar8 __ovld __cnfn convert_uchar8_sat_rtp(short8); uchar8 __ovld __cnfn convert_uchar8_rtn(short8); uchar8 __ovld __cnfn convert_uchar8_sat_rtn(short8); uchar8 __ovld __cnfn convert_uchar8(short8); uchar8 __ovld __cnfn convert_uchar8_sat(short8); uchar8 __ovld __cnfn convert_uchar8_rte(ushort8); uchar8 __ovld __cnfn convert_uchar8_sat_rte(ushort8); uchar8 __ovld __cnfn convert_uchar8_rtz(ushort8); uchar8 __ovld __cnfn convert_uchar8_sat_rtz(ushort8); uchar8 __ovld __cnfn convert_uchar8_rtp(ushort8); uchar8 __ovld __cnfn convert_uchar8_sat_rtp(ushort8); uchar8 __ovld __cnfn convert_uchar8_rtn(ushort8); uchar8 __ovld __cnfn convert_uchar8_sat_rtn(ushort8); uchar8 __ovld __cnfn convert_uchar8(ushort8); uchar8 __ovld __cnfn convert_uchar8_sat(ushort8); uchar8 __ovld __cnfn convert_uchar8_rte(int8); uchar8 __ovld __cnfn convert_uchar8_sat_rte(int8); uchar8 __ovld __cnfn convert_uchar8_rtz(int8); uchar8 __ovld __cnfn convert_uchar8_sat_rtz(int8); uchar8 __ovld __cnfn convert_uchar8_rtp(int8); uchar8 __ovld __cnfn convert_uchar8_sat_rtp(int8); uchar8 __ovld __cnfn convert_uchar8_rtn(int8); uchar8 __ovld __cnfn convert_uchar8_sat_rtn(int8); uchar8 __ovld __cnfn convert_uchar8(int8); uchar8 __ovld __cnfn convert_uchar8_sat(int8); uchar8 __ovld __cnfn convert_uchar8_rte(uint8); uchar8 __ovld __cnfn convert_uchar8_sat_rte(uint8); uchar8 __ovld __cnfn convert_uchar8_rtz(uint8); uchar8 __ovld __cnfn convert_uchar8_sat_rtz(uint8); uchar8 __ovld __cnfn convert_uchar8_rtp(uint8); uchar8 __ovld __cnfn convert_uchar8_sat_rtp(uint8); uchar8 __ovld __cnfn convert_uchar8_rtn(uint8); uchar8 __ovld __cnfn convert_uchar8_sat_rtn(uint8); uchar8 __ovld __cnfn convert_uchar8(uint8); uchar8 __ovld __cnfn convert_uchar8_sat(uint8); uchar8 __ovld __cnfn convert_uchar8_rte(long8); uchar8 __ovld __cnfn convert_uchar8_sat_rte(long8); uchar8 __ovld __cnfn convert_uchar8_rtz(long8); uchar8 __ovld __cnfn convert_uchar8_sat_rtz(long8); uchar8 __ovld __cnfn convert_uchar8_rtp(long8); uchar8 __ovld __cnfn convert_uchar8_sat_rtp(long8); uchar8 __ovld __cnfn convert_uchar8_rtn(long8); uchar8 __ovld __cnfn convert_uchar8_sat_rtn(long8); uchar8 __ovld __cnfn convert_uchar8(long8); uchar8 __ovld __cnfn convert_uchar8_sat(long8); uchar8 __ovld __cnfn convert_uchar8_rte(ulong8); uchar8 __ovld __cnfn convert_uchar8_sat_rte(ulong8); uchar8 __ovld __cnfn convert_uchar8_rtz(ulong8); uchar8 __ovld __cnfn convert_uchar8_sat_rtz(ulong8); uchar8 __ovld __cnfn convert_uchar8_rtp(ulong8); uchar8 __ovld __cnfn convert_uchar8_sat_rtp(ulong8); uchar8 __ovld __cnfn convert_uchar8_rtn(ulong8); uchar8 __ovld __cnfn convert_uchar8_sat_rtn(ulong8); uchar8 __ovld __cnfn convert_uchar8(ulong8); uchar8 __ovld __cnfn convert_uchar8_sat(ulong8); uchar8 __ovld __cnfn convert_uchar8_rte(float8); uchar8 __ovld __cnfn convert_uchar8_sat_rte(float8); uchar8 __ovld __cnfn convert_uchar8_rtz(float8); uchar8 __ovld __cnfn convert_uchar8_sat_rtz(float8); uchar8 __ovld __cnfn convert_uchar8_rtp(float8); uchar8 __ovld __cnfn convert_uchar8_sat_rtp(float8); uchar8 __ovld __cnfn convert_uchar8_rtn(float8); uchar8 __ovld __cnfn convert_uchar8_sat_rtn(float8); uchar8 __ovld __cnfn convert_uchar8(float8); uchar8 __ovld __cnfn convert_uchar8_sat(float8); short8 __ovld __cnfn convert_short8_rte(char8); short8 __ovld __cnfn convert_short8_sat_rte(char8); short8 __ovld __cnfn convert_short8_rtz(char8); short8 __ovld __cnfn convert_short8_sat_rtz(char8); short8 __ovld __cnfn convert_short8_rtp(char8); short8 __ovld __cnfn convert_short8_sat_rtp(char8); short8 __ovld __cnfn convert_short8_rtn(char8); short8 __ovld __cnfn convert_short8_sat_rtn(char8); short8 __ovld __cnfn convert_short8(char8); short8 __ovld __cnfn convert_short8_sat(char8); short8 __ovld __cnfn convert_short8_rte(uchar8); short8 __ovld __cnfn convert_short8_sat_rte(uchar8); short8 __ovld __cnfn convert_short8_rtz(uchar8); short8 __ovld __cnfn convert_short8_sat_rtz(uchar8); short8 __ovld __cnfn convert_short8_rtp(uchar8); short8 __ovld __cnfn convert_short8_sat_rtp(uchar8); short8 __ovld __cnfn convert_short8_rtn(uchar8); short8 __ovld __cnfn convert_short8_sat_rtn(uchar8); short8 __ovld __cnfn convert_short8(uchar8); short8 __ovld __cnfn convert_short8_sat(uchar8); short8 __ovld __cnfn convert_short8_rte(short8); short8 __ovld __cnfn convert_short8_sat_rte(short8); short8 __ovld __cnfn convert_short8_rtz(short8); short8 __ovld __cnfn convert_short8_sat_rtz(short8); short8 __ovld __cnfn convert_short8_rtp(short8); short8 __ovld __cnfn convert_short8_sat_rtp(short8); short8 __ovld __cnfn convert_short8_rtn(short8); short8 __ovld __cnfn convert_short8_sat_rtn(short8); short8 __ovld __cnfn convert_short8(short8); short8 __ovld __cnfn convert_short8_sat(short8); short8 __ovld __cnfn convert_short8_rte(ushort8); short8 __ovld __cnfn convert_short8_sat_rte(ushort8); short8 __ovld __cnfn convert_short8_rtz(ushort8); short8 __ovld __cnfn convert_short8_sat_rtz(ushort8); short8 __ovld __cnfn convert_short8_rtp(ushort8); short8 __ovld __cnfn convert_short8_sat_rtp(ushort8); short8 __ovld __cnfn convert_short8_rtn(ushort8); short8 __ovld __cnfn convert_short8_sat_rtn(ushort8); short8 __ovld __cnfn convert_short8(ushort8); short8 __ovld __cnfn convert_short8_sat(ushort8); short8 __ovld __cnfn convert_short8_rte(int8); short8 __ovld __cnfn convert_short8_sat_rte(int8); short8 __ovld __cnfn convert_short8_rtz(int8); short8 __ovld __cnfn convert_short8_sat_rtz(int8); short8 __ovld __cnfn convert_short8_rtp(int8); short8 __ovld __cnfn convert_short8_sat_rtp(int8); short8 __ovld __cnfn convert_short8_rtn(int8); short8 __ovld __cnfn convert_short8_sat_rtn(int8); short8 __ovld __cnfn convert_short8(int8); short8 __ovld __cnfn convert_short8_sat(int8); short8 __ovld __cnfn convert_short8_rte(uint8); short8 __ovld __cnfn convert_short8_sat_rte(uint8); short8 __ovld __cnfn convert_short8_rtz(uint8); short8 __ovld __cnfn convert_short8_sat_rtz(uint8); short8 __ovld __cnfn convert_short8_rtp(uint8); short8 __ovld __cnfn convert_short8_sat_rtp(uint8); short8 __ovld __cnfn convert_short8_rtn(uint8); short8 __ovld __cnfn convert_short8_sat_rtn(uint8); short8 __ovld __cnfn convert_short8(uint8); short8 __ovld __cnfn convert_short8_sat(uint8); short8 __ovld __cnfn convert_short8_rte(long8); short8 __ovld __cnfn convert_short8_sat_rte(long8); short8 __ovld __cnfn convert_short8_rtz(long8); short8 __ovld __cnfn convert_short8_sat_rtz(long8); short8 __ovld __cnfn convert_short8_rtp(long8); short8 __ovld __cnfn convert_short8_sat_rtp(long8); short8 __ovld __cnfn convert_short8_rtn(long8); short8 __ovld __cnfn convert_short8_sat_rtn(long8); short8 __ovld __cnfn convert_short8(long8); short8 __ovld __cnfn convert_short8_sat(long8); short8 __ovld __cnfn convert_short8_rte(ulong8); short8 __ovld __cnfn convert_short8_sat_rte(ulong8); short8 __ovld __cnfn convert_short8_rtz(ulong8); short8 __ovld __cnfn convert_short8_sat_rtz(ulong8); short8 __ovld __cnfn convert_short8_rtp(ulong8); short8 __ovld __cnfn convert_short8_sat_rtp(ulong8); short8 __ovld __cnfn convert_short8_rtn(ulong8); short8 __ovld __cnfn convert_short8_sat_rtn(ulong8); short8 __ovld __cnfn convert_short8(ulong8); short8 __ovld __cnfn convert_short8_sat(ulong8); short8 __ovld __cnfn convert_short8_rte(float8); short8 __ovld __cnfn convert_short8_sat_rte(float8); short8 __ovld __cnfn convert_short8_rtz(float8); short8 __ovld __cnfn convert_short8_sat_rtz(float8); short8 __ovld __cnfn convert_short8_rtp(float8); short8 __ovld __cnfn convert_short8_sat_rtp(float8); short8 __ovld __cnfn convert_short8_rtn(float8); short8 __ovld __cnfn convert_short8_sat_rtn(float8); short8 __ovld __cnfn convert_short8(float8); short8 __ovld __cnfn convert_short8_sat(float8); ushort8 __ovld __cnfn convert_ushort8_rte(char8); ushort8 __ovld __cnfn convert_ushort8_sat_rte(char8); ushort8 __ovld __cnfn convert_ushort8_rtz(char8); ushort8 __ovld __cnfn convert_ushort8_sat_rtz(char8); ushort8 __ovld __cnfn convert_ushort8_rtp(char8); ushort8 __ovld __cnfn convert_ushort8_sat_rtp(char8); ushort8 __ovld __cnfn convert_ushort8_rtn(char8); ushort8 __ovld __cnfn convert_ushort8_sat_rtn(char8); ushort8 __ovld __cnfn convert_ushort8(char8); ushort8 __ovld __cnfn convert_ushort8_sat(char8); ushort8 __ovld __cnfn convert_ushort8_rte(uchar8); ushort8 __ovld __cnfn convert_ushort8_sat_rte(uchar8); ushort8 __ovld __cnfn convert_ushort8_rtz(uchar8); ushort8 __ovld __cnfn convert_ushort8_sat_rtz(uchar8); ushort8 __ovld __cnfn convert_ushort8_rtp(uchar8); ushort8 __ovld __cnfn convert_ushort8_sat_rtp(uchar8); ushort8 __ovld __cnfn convert_ushort8_rtn(uchar8); ushort8 __ovld __cnfn convert_ushort8_sat_rtn(uchar8); ushort8 __ovld __cnfn convert_ushort8(uchar8); ushort8 __ovld __cnfn convert_ushort8_sat(uchar8); ushort8 __ovld __cnfn convert_ushort8_rte(short8); ushort8 __ovld __cnfn convert_ushort8_sat_rte(short8); ushort8 __ovld __cnfn convert_ushort8_rtz(short8); ushort8 __ovld __cnfn convert_ushort8_sat_rtz(short8); ushort8 __ovld __cnfn convert_ushort8_rtp(short8); ushort8 __ovld __cnfn convert_ushort8_sat_rtp(short8); ushort8 __ovld __cnfn convert_ushort8_rtn(short8); ushort8 __ovld __cnfn convert_ushort8_sat_rtn(short8); ushort8 __ovld __cnfn convert_ushort8(short8); ushort8 __ovld __cnfn convert_ushort8_sat(short8); ushort8 __ovld __cnfn convert_ushort8_rte(ushort8); ushort8 __ovld __cnfn convert_ushort8_sat_rte(ushort8); ushort8 __ovld __cnfn convert_ushort8_rtz(ushort8); ushort8 __ovld __cnfn convert_ushort8_sat_rtz(ushort8); ushort8 __ovld __cnfn convert_ushort8_rtp(ushort8); ushort8 __ovld __cnfn convert_ushort8_sat_rtp(ushort8); ushort8 __ovld __cnfn convert_ushort8_rtn(ushort8); ushort8 __ovld __cnfn convert_ushort8_sat_rtn(ushort8); ushort8 __ovld __cnfn convert_ushort8(ushort8); ushort8 __ovld __cnfn convert_ushort8_sat(ushort8); ushort8 __ovld __cnfn convert_ushort8_rte(int8); ushort8 __ovld __cnfn convert_ushort8_sat_rte(int8); ushort8 __ovld __cnfn convert_ushort8_rtz(int8); ushort8 __ovld __cnfn convert_ushort8_sat_rtz(int8); ushort8 __ovld __cnfn convert_ushort8_rtp(int8); ushort8 __ovld __cnfn convert_ushort8_sat_rtp(int8); ushort8 __ovld __cnfn convert_ushort8_rtn(int8); ushort8 __ovld __cnfn convert_ushort8_sat_rtn(int8); ushort8 __ovld __cnfn convert_ushort8(int8); ushort8 __ovld __cnfn convert_ushort8_sat(int8); ushort8 __ovld __cnfn convert_ushort8_rte(uint8); ushort8 __ovld __cnfn convert_ushort8_sat_rte(uint8); ushort8 __ovld __cnfn convert_ushort8_rtz(uint8); ushort8 __ovld __cnfn convert_ushort8_sat_rtz(uint8); ushort8 __ovld __cnfn convert_ushort8_rtp(uint8); ushort8 __ovld __cnfn convert_ushort8_sat_rtp(uint8); ushort8 __ovld __cnfn convert_ushort8_rtn(uint8); ushort8 __ovld __cnfn convert_ushort8_sat_rtn(uint8); ushort8 __ovld __cnfn convert_ushort8(uint8); ushort8 __ovld __cnfn convert_ushort8_sat(uint8); ushort8 __ovld __cnfn convert_ushort8_rte(long8); ushort8 __ovld __cnfn convert_ushort8_sat_rte(long8); ushort8 __ovld __cnfn convert_ushort8_rtz(long8); ushort8 __ovld __cnfn convert_ushort8_sat_rtz(long8); ushort8 __ovld __cnfn convert_ushort8_rtp(long8); ushort8 __ovld __cnfn convert_ushort8_sat_rtp(long8); ushort8 __ovld __cnfn convert_ushort8_rtn(long8); ushort8 __ovld __cnfn convert_ushort8_sat_rtn(long8); ushort8 __ovld __cnfn convert_ushort8(long8); ushort8 __ovld __cnfn convert_ushort8_sat(long8); ushort8 __ovld __cnfn convert_ushort8_rte(ulong8); ushort8 __ovld __cnfn convert_ushort8_sat_rte(ulong8); ushort8 __ovld __cnfn convert_ushort8_rtz(ulong8); ushort8 __ovld __cnfn convert_ushort8_sat_rtz(ulong8); ushort8 __ovld __cnfn convert_ushort8_rtp(ulong8); ushort8 __ovld __cnfn convert_ushort8_sat_rtp(ulong8); ushort8 __ovld __cnfn convert_ushort8_rtn(ulong8); ushort8 __ovld __cnfn convert_ushort8_sat_rtn(ulong8); ushort8 __ovld __cnfn convert_ushort8(ulong8); ushort8 __ovld __cnfn convert_ushort8_sat(ulong8); ushort8 __ovld __cnfn convert_ushort8_rte(float8); ushort8 __ovld __cnfn convert_ushort8_sat_rte(float8); ushort8 __ovld __cnfn convert_ushort8_rtz(float8); ushort8 __ovld __cnfn convert_ushort8_sat_rtz(float8); ushort8 __ovld __cnfn convert_ushort8_rtp(float8); ushort8 __ovld __cnfn convert_ushort8_sat_rtp(float8); ushort8 __ovld __cnfn convert_ushort8_rtn(float8); ushort8 __ovld __cnfn convert_ushort8_sat_rtn(float8); ushort8 __ovld __cnfn convert_ushort8(float8); ushort8 __ovld __cnfn convert_ushort8_sat(float8); int8 __ovld __cnfn convert_int8_rte(char8); int8 __ovld __cnfn convert_int8_sat_rte(char8); int8 __ovld __cnfn convert_int8_rtz(char8); int8 __ovld __cnfn convert_int8_sat_rtz(char8); int8 __ovld __cnfn convert_int8_rtp(char8); int8 __ovld __cnfn convert_int8_sat_rtp(char8); int8 __ovld __cnfn convert_int8_rtn(char8); int8 __ovld __cnfn convert_int8_sat_rtn(char8); int8 __ovld __cnfn convert_int8(char8); int8 __ovld __cnfn convert_int8_sat(char8); int8 __ovld __cnfn convert_int8_rte(uchar8); int8 __ovld __cnfn convert_int8_sat_rte(uchar8); int8 __ovld __cnfn convert_int8_rtz(uchar8); int8 __ovld __cnfn convert_int8_sat_rtz(uchar8); int8 __ovld __cnfn convert_int8_rtp(uchar8); int8 __ovld __cnfn convert_int8_sat_rtp(uchar8); int8 __ovld __cnfn convert_int8_rtn(uchar8); int8 __ovld __cnfn convert_int8_sat_rtn(uchar8); int8 __ovld __cnfn convert_int8(uchar8); int8 __ovld __cnfn convert_int8_sat(uchar8); int8 __ovld __cnfn convert_int8_rte(short8); int8 __ovld __cnfn convert_int8_sat_rte(short8); int8 __ovld __cnfn convert_int8_rtz(short8); int8 __ovld __cnfn convert_int8_sat_rtz(short8); int8 __ovld __cnfn convert_int8_rtp(short8); int8 __ovld __cnfn convert_int8_sat_rtp(short8); int8 __ovld __cnfn convert_int8_rtn(short8); int8 __ovld __cnfn convert_int8_sat_rtn(short8); int8 __ovld __cnfn convert_int8(short8); int8 __ovld __cnfn convert_int8_sat(short8); int8 __ovld __cnfn convert_int8_rte(ushort8); int8 __ovld __cnfn convert_int8_sat_rte(ushort8); int8 __ovld __cnfn convert_int8_rtz(ushort8); int8 __ovld __cnfn convert_int8_sat_rtz(ushort8); int8 __ovld __cnfn convert_int8_rtp(ushort8); int8 __ovld __cnfn convert_int8_sat_rtp(ushort8); int8 __ovld __cnfn convert_int8_rtn(ushort8); int8 __ovld __cnfn convert_int8_sat_rtn(ushort8); int8 __ovld __cnfn convert_int8(ushort8); int8 __ovld __cnfn convert_int8_sat(ushort8); int8 __ovld __cnfn convert_int8_rte(int8); int8 __ovld __cnfn convert_int8_sat_rte(int8); int8 __ovld __cnfn convert_int8_rtz(int8); int8 __ovld __cnfn convert_int8_sat_rtz(int8); int8 __ovld __cnfn convert_int8_rtp(int8); int8 __ovld __cnfn convert_int8_sat_rtp(int8); int8 __ovld __cnfn convert_int8_rtn(int8); int8 __ovld __cnfn convert_int8_sat_rtn(int8); int8 __ovld __cnfn convert_int8(int8); int8 __ovld __cnfn convert_int8_sat(int8); int8 __ovld __cnfn convert_int8_rte(uint8); int8 __ovld __cnfn convert_int8_sat_rte(uint8); int8 __ovld __cnfn convert_int8_rtz(uint8); int8 __ovld __cnfn convert_int8_sat_rtz(uint8); int8 __ovld __cnfn convert_int8_rtp(uint8); int8 __ovld __cnfn convert_int8_sat_rtp(uint8); int8 __ovld __cnfn convert_int8_rtn(uint8); int8 __ovld __cnfn convert_int8_sat_rtn(uint8); int8 __ovld __cnfn convert_int8(uint8); int8 __ovld __cnfn convert_int8_sat(uint8); int8 __ovld __cnfn convert_int8_rte(long8); int8 __ovld __cnfn convert_int8_sat_rte(long8); int8 __ovld __cnfn convert_int8_rtz(long8); int8 __ovld __cnfn convert_int8_sat_rtz(long8); int8 __ovld __cnfn convert_int8_rtp(long8); int8 __ovld __cnfn convert_int8_sat_rtp(long8); int8 __ovld __cnfn convert_int8_rtn(long8); int8 __ovld __cnfn convert_int8_sat_rtn(long8); int8 __ovld __cnfn convert_int8(long8); int8 __ovld __cnfn convert_int8_sat(long8); int8 __ovld __cnfn convert_int8_rte(ulong8); int8 __ovld __cnfn convert_int8_sat_rte(ulong8); int8 __ovld __cnfn convert_int8_rtz(ulong8); int8 __ovld __cnfn convert_int8_sat_rtz(ulong8); int8 __ovld __cnfn convert_int8_rtp(ulong8); int8 __ovld __cnfn convert_int8_sat_rtp(ulong8); int8 __ovld __cnfn convert_int8_rtn(ulong8); int8 __ovld __cnfn convert_int8_sat_rtn(ulong8); int8 __ovld __cnfn convert_int8(ulong8); int8 __ovld __cnfn convert_int8_sat(ulong8); int8 __ovld __cnfn convert_int8_rte(float8); int8 __ovld __cnfn convert_int8_sat_rte(float8); int8 __ovld __cnfn convert_int8_rtz(float8); int8 __ovld __cnfn convert_int8_sat_rtz(float8); int8 __ovld __cnfn convert_int8_rtp(float8); int8 __ovld __cnfn convert_int8_sat_rtp(float8); int8 __ovld __cnfn convert_int8_rtn(float8); int8 __ovld __cnfn convert_int8_sat_rtn(float8); int8 __ovld __cnfn convert_int8(float8); int8 __ovld __cnfn convert_int8_sat(float8); uint8 __ovld __cnfn convert_uint8_rte(char8); uint8 __ovld __cnfn convert_uint8_sat_rte(char8); uint8 __ovld __cnfn convert_uint8_rtz(char8); uint8 __ovld __cnfn convert_uint8_sat_rtz(char8); uint8 __ovld __cnfn convert_uint8_rtp(char8); uint8 __ovld __cnfn convert_uint8_sat_rtp(char8); uint8 __ovld __cnfn convert_uint8_rtn(char8); uint8 __ovld __cnfn convert_uint8_sat_rtn(char8); uint8 __ovld __cnfn convert_uint8(char8); uint8 __ovld __cnfn convert_uint8_sat(char8); uint8 __ovld __cnfn convert_uint8_rte(uchar8); uint8 __ovld __cnfn convert_uint8_sat_rte(uchar8); uint8 __ovld __cnfn convert_uint8_rtz(uchar8); uint8 __ovld __cnfn convert_uint8_sat_rtz(uchar8); uint8 __ovld __cnfn convert_uint8_rtp(uchar8); uint8 __ovld __cnfn convert_uint8_sat_rtp(uchar8); uint8 __ovld __cnfn convert_uint8_rtn(uchar8); uint8 __ovld __cnfn convert_uint8_sat_rtn(uchar8); uint8 __ovld __cnfn convert_uint8(uchar8); uint8 __ovld __cnfn convert_uint8_sat(uchar8); uint8 __ovld __cnfn convert_uint8_rte(short8); uint8 __ovld __cnfn convert_uint8_sat_rte(short8); uint8 __ovld __cnfn convert_uint8_rtz(short8); uint8 __ovld __cnfn convert_uint8_sat_rtz(short8); uint8 __ovld __cnfn convert_uint8_rtp(short8); uint8 __ovld __cnfn convert_uint8_sat_rtp(short8); uint8 __ovld __cnfn convert_uint8_rtn(short8); uint8 __ovld __cnfn convert_uint8_sat_rtn(short8); uint8 __ovld __cnfn convert_uint8(short8); uint8 __ovld __cnfn convert_uint8_sat(short8); uint8 __ovld __cnfn convert_uint8_rte(ushort8); uint8 __ovld __cnfn convert_uint8_sat_rte(ushort8); uint8 __ovld __cnfn convert_uint8_rtz(ushort8); uint8 __ovld __cnfn convert_uint8_sat_rtz(ushort8); uint8 __ovld __cnfn convert_uint8_rtp(ushort8); uint8 __ovld __cnfn convert_uint8_sat_rtp(ushort8); uint8 __ovld __cnfn convert_uint8_rtn(ushort8); uint8 __ovld __cnfn convert_uint8_sat_rtn(ushort8); uint8 __ovld __cnfn convert_uint8(ushort8); uint8 __ovld __cnfn convert_uint8_sat(ushort8); uint8 __ovld __cnfn convert_uint8_rte(int8); uint8 __ovld __cnfn convert_uint8_sat_rte(int8); uint8 __ovld __cnfn convert_uint8_rtz(int8); uint8 __ovld __cnfn convert_uint8_sat_rtz(int8); uint8 __ovld __cnfn convert_uint8_rtp(int8); uint8 __ovld __cnfn convert_uint8_sat_rtp(int8); uint8 __ovld __cnfn convert_uint8_rtn(int8); uint8 __ovld __cnfn convert_uint8_sat_rtn(int8); uint8 __ovld __cnfn convert_uint8(int8); uint8 __ovld __cnfn convert_uint8_sat(int8); uint8 __ovld __cnfn convert_uint8_rte(uint8); uint8 __ovld __cnfn convert_uint8_sat_rte(uint8); uint8 __ovld __cnfn convert_uint8_rtz(uint8); uint8 __ovld __cnfn convert_uint8_sat_rtz(uint8); uint8 __ovld __cnfn convert_uint8_rtp(uint8); uint8 __ovld __cnfn convert_uint8_sat_rtp(uint8); uint8 __ovld __cnfn convert_uint8_rtn(uint8); uint8 __ovld __cnfn convert_uint8_sat_rtn(uint8); uint8 __ovld __cnfn convert_uint8(uint8); uint8 __ovld __cnfn convert_uint8_sat(uint8); uint8 __ovld __cnfn convert_uint8_rte(long8); uint8 __ovld __cnfn convert_uint8_sat_rte(long8); uint8 __ovld __cnfn convert_uint8_rtz(long8); uint8 __ovld __cnfn convert_uint8_sat_rtz(long8); uint8 __ovld __cnfn convert_uint8_rtp(long8); uint8 __ovld __cnfn convert_uint8_sat_rtp(long8); uint8 __ovld __cnfn convert_uint8_rtn(long8); uint8 __ovld __cnfn convert_uint8_sat_rtn(long8); uint8 __ovld __cnfn convert_uint8(long8); uint8 __ovld __cnfn convert_uint8_sat(long8); uint8 __ovld __cnfn convert_uint8_rte(ulong8); uint8 __ovld __cnfn convert_uint8_sat_rte(ulong8); uint8 __ovld __cnfn convert_uint8_rtz(ulong8); uint8 __ovld __cnfn convert_uint8_sat_rtz(ulong8); uint8 __ovld __cnfn convert_uint8_rtp(ulong8); uint8 __ovld __cnfn convert_uint8_sat_rtp(ulong8); uint8 __ovld __cnfn convert_uint8_rtn(ulong8); uint8 __ovld __cnfn convert_uint8_sat_rtn(ulong8); uint8 __ovld __cnfn convert_uint8(ulong8); uint8 __ovld __cnfn convert_uint8_sat(ulong8); uint8 __ovld __cnfn convert_uint8_rte(float8); uint8 __ovld __cnfn convert_uint8_sat_rte(float8); uint8 __ovld __cnfn convert_uint8_rtz(float8); uint8 __ovld __cnfn convert_uint8_sat_rtz(float8); uint8 __ovld __cnfn convert_uint8_rtp(float8); uint8 __ovld __cnfn convert_uint8_sat_rtp(float8); uint8 __ovld __cnfn convert_uint8_rtn(float8); uint8 __ovld __cnfn convert_uint8_sat_rtn(float8); uint8 __ovld __cnfn convert_uint8(float8); uint8 __ovld __cnfn convert_uint8_sat(float8); long8 __ovld __cnfn convert_long8_rte(char8); long8 __ovld __cnfn convert_long8_sat_rte(char8); long8 __ovld __cnfn convert_long8_rtz(char8); long8 __ovld __cnfn convert_long8_sat_rtz(char8); long8 __ovld __cnfn convert_long8_rtp(char8); long8 __ovld __cnfn convert_long8_sat_rtp(char8); long8 __ovld __cnfn convert_long8_rtn(char8); long8 __ovld __cnfn convert_long8_sat_rtn(char8); long8 __ovld __cnfn convert_long8(char8); long8 __ovld __cnfn convert_long8_sat(char8); long8 __ovld __cnfn convert_long8_rte(uchar8); long8 __ovld __cnfn convert_long8_sat_rte(uchar8); long8 __ovld __cnfn convert_long8_rtz(uchar8); long8 __ovld __cnfn convert_long8_sat_rtz(uchar8); long8 __ovld __cnfn convert_long8_rtp(uchar8); long8 __ovld __cnfn convert_long8_sat_rtp(uchar8); long8 __ovld __cnfn convert_long8_rtn(uchar8); long8 __ovld __cnfn convert_long8_sat_rtn(uchar8); long8 __ovld __cnfn convert_long8(uchar8); long8 __ovld __cnfn convert_long8_sat(uchar8); long8 __ovld __cnfn convert_long8_rte(short8); long8 __ovld __cnfn convert_long8_sat_rte(short8); long8 __ovld __cnfn convert_long8_rtz(short8); long8 __ovld __cnfn convert_long8_sat_rtz(short8); long8 __ovld __cnfn convert_long8_rtp(short8); long8 __ovld __cnfn convert_long8_sat_rtp(short8); long8 __ovld __cnfn convert_long8_rtn(short8); long8 __ovld __cnfn convert_long8_sat_rtn(short8); long8 __ovld __cnfn convert_long8(short8); long8 __ovld __cnfn convert_long8_sat(short8); long8 __ovld __cnfn convert_long8_rte(ushort8); long8 __ovld __cnfn convert_long8_sat_rte(ushort8); long8 __ovld __cnfn convert_long8_rtz(ushort8); long8 __ovld __cnfn convert_long8_sat_rtz(ushort8); long8 __ovld __cnfn convert_long8_rtp(ushort8); long8 __ovld __cnfn convert_long8_sat_rtp(ushort8); long8 __ovld __cnfn convert_long8_rtn(ushort8); long8 __ovld __cnfn convert_long8_sat_rtn(ushort8); long8 __ovld __cnfn convert_long8(ushort8); long8 __ovld __cnfn convert_long8_sat(ushort8); long8 __ovld __cnfn convert_long8_rte(int8); long8 __ovld __cnfn convert_long8_sat_rte(int8); long8 __ovld __cnfn convert_long8_rtz(int8); long8 __ovld __cnfn convert_long8_sat_rtz(int8); long8 __ovld __cnfn convert_long8_rtp(int8); long8 __ovld __cnfn convert_long8_sat_rtp(int8); long8 __ovld __cnfn convert_long8_rtn(int8); long8 __ovld __cnfn convert_long8_sat_rtn(int8); long8 __ovld __cnfn convert_long8(int8); long8 __ovld __cnfn convert_long8_sat(int8); long8 __ovld __cnfn convert_long8_rte(uint8); long8 __ovld __cnfn convert_long8_sat_rte(uint8); long8 __ovld __cnfn convert_long8_rtz(uint8); long8 __ovld __cnfn convert_long8_sat_rtz(uint8); long8 __ovld __cnfn convert_long8_rtp(uint8); long8 __ovld __cnfn convert_long8_sat_rtp(uint8); long8 __ovld __cnfn convert_long8_rtn(uint8); long8 __ovld __cnfn convert_long8_sat_rtn(uint8); long8 __ovld __cnfn convert_long8(uint8); long8 __ovld __cnfn convert_long8_sat(uint8); long8 __ovld __cnfn convert_long8_rte(long8); long8 __ovld __cnfn convert_long8_sat_rte(long8); long8 __ovld __cnfn convert_long8_rtz(long8); long8 __ovld __cnfn convert_long8_sat_rtz(long8); long8 __ovld __cnfn convert_long8_rtp(long8); long8 __ovld __cnfn convert_long8_sat_rtp(long8); long8 __ovld __cnfn convert_long8_rtn(long8); long8 __ovld __cnfn convert_long8_sat_rtn(long8); long8 __ovld __cnfn convert_long8(long8); long8 __ovld __cnfn convert_long8_sat(long8); long8 __ovld __cnfn convert_long8_rte(ulong8); long8 __ovld __cnfn convert_long8_sat_rte(ulong8); long8 __ovld __cnfn convert_long8_rtz(ulong8); long8 __ovld __cnfn convert_long8_sat_rtz(ulong8); long8 __ovld __cnfn convert_long8_rtp(ulong8); long8 __ovld __cnfn convert_long8_sat_rtp(ulong8); long8 __ovld __cnfn convert_long8_rtn(ulong8); long8 __ovld __cnfn convert_long8_sat_rtn(ulong8); long8 __ovld __cnfn convert_long8(ulong8); long8 __ovld __cnfn convert_long8_sat(ulong8); long8 __ovld __cnfn convert_long8_rte(float8); long8 __ovld __cnfn convert_long8_sat_rte(float8); long8 __ovld __cnfn convert_long8_rtz(float8); long8 __ovld __cnfn convert_long8_sat_rtz(float8); long8 __ovld __cnfn convert_long8_rtp(float8); long8 __ovld __cnfn convert_long8_sat_rtp(float8); long8 __ovld __cnfn convert_long8_rtn(float8); long8 __ovld __cnfn convert_long8_sat_rtn(float8); long8 __ovld __cnfn convert_long8(float8); long8 __ovld __cnfn convert_long8_sat(float8); ulong8 __ovld __cnfn convert_ulong8_rte(char8); ulong8 __ovld __cnfn convert_ulong8_sat_rte(char8); ulong8 __ovld __cnfn convert_ulong8_rtz(char8); ulong8 __ovld __cnfn convert_ulong8_sat_rtz(char8); ulong8 __ovld __cnfn convert_ulong8_rtp(char8); ulong8 __ovld __cnfn convert_ulong8_sat_rtp(char8); ulong8 __ovld __cnfn convert_ulong8_rtn(char8); ulong8 __ovld __cnfn convert_ulong8_sat_rtn(char8); ulong8 __ovld __cnfn convert_ulong8(char8); ulong8 __ovld __cnfn convert_ulong8_sat(char8); ulong8 __ovld __cnfn convert_ulong8_rte(uchar8); ulong8 __ovld __cnfn convert_ulong8_sat_rte(uchar8); ulong8 __ovld __cnfn convert_ulong8_rtz(uchar8); ulong8 __ovld __cnfn convert_ulong8_sat_rtz(uchar8); ulong8 __ovld __cnfn convert_ulong8_rtp(uchar8); ulong8 __ovld __cnfn convert_ulong8_sat_rtp(uchar8); ulong8 __ovld __cnfn convert_ulong8_rtn(uchar8); ulong8 __ovld __cnfn convert_ulong8_sat_rtn(uchar8); ulong8 __ovld __cnfn convert_ulong8(uchar8); ulong8 __ovld __cnfn convert_ulong8_sat(uchar8); ulong8 __ovld __cnfn convert_ulong8_rte(short8); ulong8 __ovld __cnfn convert_ulong8_sat_rte(short8); ulong8 __ovld __cnfn convert_ulong8_rtz(short8); ulong8 __ovld __cnfn convert_ulong8_sat_rtz(short8); ulong8 __ovld __cnfn convert_ulong8_rtp(short8); ulong8 __ovld __cnfn convert_ulong8_sat_rtp(short8); ulong8 __ovld __cnfn convert_ulong8_rtn(short8); ulong8 __ovld __cnfn convert_ulong8_sat_rtn(short8); ulong8 __ovld __cnfn convert_ulong8(short8); ulong8 __ovld __cnfn convert_ulong8_sat(short8); ulong8 __ovld __cnfn convert_ulong8_rte(ushort8); ulong8 __ovld __cnfn convert_ulong8_sat_rte(ushort8); ulong8 __ovld __cnfn convert_ulong8_rtz(ushort8); ulong8 __ovld __cnfn convert_ulong8_sat_rtz(ushort8); ulong8 __ovld __cnfn convert_ulong8_rtp(ushort8); ulong8 __ovld __cnfn convert_ulong8_sat_rtp(ushort8); ulong8 __ovld __cnfn convert_ulong8_rtn(ushort8); ulong8 __ovld __cnfn convert_ulong8_sat_rtn(ushort8); ulong8 __ovld __cnfn convert_ulong8(ushort8); ulong8 __ovld __cnfn convert_ulong8_sat(ushort8); ulong8 __ovld __cnfn convert_ulong8_rte(int8); ulong8 __ovld __cnfn convert_ulong8_sat_rte(int8); ulong8 __ovld __cnfn convert_ulong8_rtz(int8); ulong8 __ovld __cnfn convert_ulong8_sat_rtz(int8); ulong8 __ovld __cnfn convert_ulong8_rtp(int8); ulong8 __ovld __cnfn convert_ulong8_sat_rtp(int8); ulong8 __ovld __cnfn convert_ulong8_rtn(int8); ulong8 __ovld __cnfn convert_ulong8_sat_rtn(int8); ulong8 __ovld __cnfn convert_ulong8(int8); ulong8 __ovld __cnfn convert_ulong8_sat(int8); ulong8 __ovld __cnfn convert_ulong8_rte(uint8); ulong8 __ovld __cnfn convert_ulong8_sat_rte(uint8); ulong8 __ovld __cnfn convert_ulong8_rtz(uint8); ulong8 __ovld __cnfn convert_ulong8_sat_rtz(uint8); ulong8 __ovld __cnfn convert_ulong8_rtp(uint8); ulong8 __ovld __cnfn convert_ulong8_sat_rtp(uint8); ulong8 __ovld __cnfn convert_ulong8_rtn(uint8); ulong8 __ovld __cnfn convert_ulong8_sat_rtn(uint8); ulong8 __ovld __cnfn convert_ulong8(uint8); ulong8 __ovld __cnfn convert_ulong8_sat(uint8); ulong8 __ovld __cnfn convert_ulong8_rte(long8); ulong8 __ovld __cnfn convert_ulong8_sat_rte(long8); ulong8 __ovld __cnfn convert_ulong8_rtz(long8); ulong8 __ovld __cnfn convert_ulong8_sat_rtz(long8); ulong8 __ovld __cnfn convert_ulong8_rtp(long8); ulong8 __ovld __cnfn convert_ulong8_sat_rtp(long8); ulong8 __ovld __cnfn convert_ulong8_rtn(long8); ulong8 __ovld __cnfn convert_ulong8_sat_rtn(long8); ulong8 __ovld __cnfn convert_ulong8(long8); ulong8 __ovld __cnfn convert_ulong8_sat(long8); ulong8 __ovld __cnfn convert_ulong8_rte(ulong8); ulong8 __ovld __cnfn convert_ulong8_sat_rte(ulong8); ulong8 __ovld __cnfn convert_ulong8_rtz(ulong8); ulong8 __ovld __cnfn convert_ulong8_sat_rtz(ulong8); ulong8 __ovld __cnfn convert_ulong8_rtp(ulong8); ulong8 __ovld __cnfn convert_ulong8_sat_rtp(ulong8); ulong8 __ovld __cnfn convert_ulong8_rtn(ulong8); ulong8 __ovld __cnfn convert_ulong8_sat_rtn(ulong8); ulong8 __ovld __cnfn convert_ulong8(ulong8); ulong8 __ovld __cnfn convert_ulong8_sat(ulong8); ulong8 __ovld __cnfn convert_ulong8_rte(float8); ulong8 __ovld __cnfn convert_ulong8_sat_rte(float8); ulong8 __ovld __cnfn convert_ulong8_rtz(float8); ulong8 __ovld __cnfn convert_ulong8_sat_rtz(float8); ulong8 __ovld __cnfn convert_ulong8_rtp(float8); ulong8 __ovld __cnfn convert_ulong8_sat_rtp(float8); ulong8 __ovld __cnfn convert_ulong8_rtn(float8); ulong8 __ovld __cnfn convert_ulong8_sat_rtn(float8); ulong8 __ovld __cnfn convert_ulong8(float8); ulong8 __ovld __cnfn convert_ulong8_sat(float8); float8 __ovld __cnfn convert_float8_rte(char8); float8 __ovld __cnfn convert_float8_rtz(char8); float8 __ovld __cnfn convert_float8_rtp(char8); float8 __ovld __cnfn convert_float8_rtn(char8); float8 __ovld __cnfn convert_float8(char8); float8 __ovld __cnfn convert_float8_rte(uchar8); float8 __ovld __cnfn convert_float8_rtz(uchar8); float8 __ovld __cnfn convert_float8_rtp(uchar8); float8 __ovld __cnfn convert_float8_rtn(uchar8); float8 __ovld __cnfn convert_float8(uchar8); float8 __ovld __cnfn convert_float8_rte(short8); float8 __ovld __cnfn convert_float8_rtz(short8); float8 __ovld __cnfn convert_float8_rtp(short8); float8 __ovld __cnfn convert_float8_rtn(short8); float8 __ovld __cnfn convert_float8(short8); float8 __ovld __cnfn convert_float8_rte(ushort8); float8 __ovld __cnfn convert_float8_rtz(ushort8); float8 __ovld __cnfn convert_float8_rtp(ushort8); float8 __ovld __cnfn convert_float8_rtn(ushort8); float8 __ovld __cnfn convert_float8(ushort8); float8 __ovld __cnfn convert_float8_rte(int8); float8 __ovld __cnfn convert_float8_rtz(int8); float8 __ovld __cnfn convert_float8_rtp(int8); float8 __ovld __cnfn convert_float8_rtn(int8); float8 __ovld __cnfn convert_float8(int8); float8 __ovld __cnfn convert_float8_rte(uint8); float8 __ovld __cnfn convert_float8_rtz(uint8); float8 __ovld __cnfn convert_float8_rtp(uint8); float8 __ovld __cnfn convert_float8_rtn(uint8); float8 __ovld __cnfn convert_float8(uint8); float8 __ovld __cnfn convert_float8_rte(long8); float8 __ovld __cnfn convert_float8_rtz(long8); float8 __ovld __cnfn convert_float8_rtp(long8); float8 __ovld __cnfn convert_float8_rtn(long8); float8 __ovld __cnfn convert_float8(long8); float8 __ovld __cnfn convert_float8_rte(ulong8); float8 __ovld __cnfn convert_float8_rtz(ulong8); float8 __ovld __cnfn convert_float8_rtp(ulong8); float8 __ovld __cnfn convert_float8_rtn(ulong8); float8 __ovld __cnfn convert_float8(ulong8); float8 __ovld __cnfn convert_float8_rte(float8); float8 __ovld __cnfn convert_float8_rtz(float8); float8 __ovld __cnfn convert_float8_rtp(float8); float8 __ovld __cnfn convert_float8_rtn(float8); float8 __ovld __cnfn convert_float8(float8); char16 __ovld __cnfn convert_char16_rte(char16); char16 __ovld __cnfn convert_char16_sat_rte(char16); char16 __ovld __cnfn convert_char16_rtz(char16); char16 __ovld __cnfn convert_char16_sat_rtz(char16); char16 __ovld __cnfn convert_char16_rtp(char16); char16 __ovld __cnfn convert_char16_sat_rtp(char16); char16 __ovld __cnfn convert_char16_rtn(char16); char16 __ovld __cnfn convert_char16_sat_rtn(char16); char16 __ovld __cnfn convert_char16(char16); char16 __ovld __cnfn convert_char16_sat(char16); char16 __ovld __cnfn convert_char16_rte(uchar16); char16 __ovld __cnfn convert_char16_sat_rte(uchar16); char16 __ovld __cnfn convert_char16_rtz(uchar16); char16 __ovld __cnfn convert_char16_sat_rtz(uchar16); char16 __ovld __cnfn convert_char16_rtp(uchar16); char16 __ovld __cnfn convert_char16_sat_rtp(uchar16); char16 __ovld __cnfn convert_char16_rtn(uchar16); char16 __ovld __cnfn convert_char16_sat_rtn(uchar16); char16 __ovld __cnfn convert_char16(uchar16); char16 __ovld __cnfn convert_char16_sat(uchar16); char16 __ovld __cnfn convert_char16_rte(short16); char16 __ovld __cnfn convert_char16_sat_rte(short16); char16 __ovld __cnfn convert_char16_rtz(short16); char16 __ovld __cnfn convert_char16_sat_rtz(short16); char16 __ovld __cnfn convert_char16_rtp(short16); char16 __ovld __cnfn convert_char16_sat_rtp(short16); char16 __ovld __cnfn convert_char16_rtn(short16); char16 __ovld __cnfn convert_char16_sat_rtn(short16); char16 __ovld __cnfn convert_char16(short16); char16 __ovld __cnfn convert_char16_sat(short16); char16 __ovld __cnfn convert_char16_rte(ushort16); char16 __ovld __cnfn convert_char16_sat_rte(ushort16); char16 __ovld __cnfn convert_char16_rtz(ushort16); char16 __ovld __cnfn convert_char16_sat_rtz(ushort16); char16 __ovld __cnfn convert_char16_rtp(ushort16); char16 __ovld __cnfn convert_char16_sat_rtp(ushort16); char16 __ovld __cnfn convert_char16_rtn(ushort16); char16 __ovld __cnfn convert_char16_sat_rtn(ushort16); char16 __ovld __cnfn convert_char16(ushort16); char16 __ovld __cnfn convert_char16_sat(ushort16); char16 __ovld __cnfn convert_char16_rte(int16); char16 __ovld __cnfn convert_char16_sat_rte(int16); char16 __ovld __cnfn convert_char16_rtz(int16); char16 __ovld __cnfn convert_char16_sat_rtz(int16); char16 __ovld __cnfn convert_char16_rtp(int16); char16 __ovld __cnfn convert_char16_sat_rtp(int16); char16 __ovld __cnfn convert_char16_rtn(int16); char16 __ovld __cnfn convert_char16_sat_rtn(int16); char16 __ovld __cnfn convert_char16(int16); char16 __ovld __cnfn convert_char16_sat(int16); char16 __ovld __cnfn convert_char16_rte(uint16); char16 __ovld __cnfn convert_char16_sat_rte(uint16); char16 __ovld __cnfn convert_char16_rtz(uint16); char16 __ovld __cnfn convert_char16_sat_rtz(uint16); char16 __ovld __cnfn convert_char16_rtp(uint16); char16 __ovld __cnfn convert_char16_sat_rtp(uint16); char16 __ovld __cnfn convert_char16_rtn(uint16); char16 __ovld __cnfn convert_char16_sat_rtn(uint16); char16 __ovld __cnfn convert_char16(uint16); char16 __ovld __cnfn convert_char16_sat(uint16); char16 __ovld __cnfn convert_char16_rte(long16); char16 __ovld __cnfn convert_char16_sat_rte(long16); char16 __ovld __cnfn convert_char16_rtz(long16); char16 __ovld __cnfn convert_char16_sat_rtz(long16); char16 __ovld __cnfn convert_char16_rtp(long16); char16 __ovld __cnfn convert_char16_sat_rtp(long16); char16 __ovld __cnfn convert_char16_rtn(long16); char16 __ovld __cnfn convert_char16_sat_rtn(long16); char16 __ovld __cnfn convert_char16(long16); char16 __ovld __cnfn convert_char16_sat(long16); char16 __ovld __cnfn convert_char16_rte(ulong16); char16 __ovld __cnfn convert_char16_sat_rte(ulong16); char16 __ovld __cnfn convert_char16_rtz(ulong16); char16 __ovld __cnfn convert_char16_sat_rtz(ulong16); char16 __ovld __cnfn convert_char16_rtp(ulong16); char16 __ovld __cnfn convert_char16_sat_rtp(ulong16); char16 __ovld __cnfn convert_char16_rtn(ulong16); char16 __ovld __cnfn convert_char16_sat_rtn(ulong16); char16 __ovld __cnfn convert_char16(ulong16); char16 __ovld __cnfn convert_char16_sat(ulong16); char16 __ovld __cnfn convert_char16_rte(float16); char16 __ovld __cnfn convert_char16_sat_rte(float16); char16 __ovld __cnfn convert_char16_rtz(float16); char16 __ovld __cnfn convert_char16_sat_rtz(float16); char16 __ovld __cnfn convert_char16_rtp(float16); char16 __ovld __cnfn convert_char16_sat_rtp(float16); char16 __ovld __cnfn convert_char16_rtn(float16); char16 __ovld __cnfn convert_char16_sat_rtn(float16); char16 __ovld __cnfn convert_char16(float16); char16 __ovld __cnfn convert_char16_sat(float16); uchar16 __ovld __cnfn convert_uchar16_rte(char16); uchar16 __ovld __cnfn convert_uchar16_sat_rte(char16); uchar16 __ovld __cnfn convert_uchar16_rtz(char16); uchar16 __ovld __cnfn convert_uchar16_sat_rtz(char16); uchar16 __ovld __cnfn convert_uchar16_rtp(char16); uchar16 __ovld __cnfn convert_uchar16_sat_rtp(char16); uchar16 __ovld __cnfn convert_uchar16_rtn(char16); uchar16 __ovld __cnfn convert_uchar16_sat_rtn(char16); uchar16 __ovld __cnfn convert_uchar16(char16); uchar16 __ovld __cnfn convert_uchar16_sat(char16); uchar16 __ovld __cnfn convert_uchar16_rte(uchar16); uchar16 __ovld __cnfn convert_uchar16_sat_rte(uchar16); uchar16 __ovld __cnfn convert_uchar16_rtz(uchar16); uchar16 __ovld __cnfn convert_uchar16_sat_rtz(uchar16); uchar16 __ovld __cnfn convert_uchar16_rtp(uchar16); uchar16 __ovld __cnfn convert_uchar16_sat_rtp(uchar16); uchar16 __ovld __cnfn convert_uchar16_rtn(uchar16); uchar16 __ovld __cnfn convert_uchar16_sat_rtn(uchar16); uchar16 __ovld __cnfn convert_uchar16(uchar16); uchar16 __ovld __cnfn convert_uchar16_sat(uchar16); uchar16 __ovld __cnfn convert_uchar16_rte(short16); uchar16 __ovld __cnfn convert_uchar16_sat_rte(short16); uchar16 __ovld __cnfn convert_uchar16_rtz(short16); uchar16 __ovld __cnfn convert_uchar16_sat_rtz(short16); uchar16 __ovld __cnfn convert_uchar16_rtp(short16); uchar16 __ovld __cnfn convert_uchar16_sat_rtp(short16); uchar16 __ovld __cnfn convert_uchar16_rtn(short16); uchar16 __ovld __cnfn convert_uchar16_sat_rtn(short16); uchar16 __ovld __cnfn convert_uchar16(short16); uchar16 __ovld __cnfn convert_uchar16_sat(short16); uchar16 __ovld __cnfn convert_uchar16_rte(ushort16); uchar16 __ovld __cnfn convert_uchar16_sat_rte(ushort16); uchar16 __ovld __cnfn convert_uchar16_rtz(ushort16); uchar16 __ovld __cnfn convert_uchar16_sat_rtz(ushort16); uchar16 __ovld __cnfn convert_uchar16_rtp(ushort16); uchar16 __ovld __cnfn convert_uchar16_sat_rtp(ushort16); uchar16 __ovld __cnfn convert_uchar16_rtn(ushort16); uchar16 __ovld __cnfn convert_uchar16_sat_rtn(ushort16); uchar16 __ovld __cnfn convert_uchar16(ushort16); uchar16 __ovld __cnfn convert_uchar16_sat(ushort16); uchar16 __ovld __cnfn convert_uchar16_rte(int16); uchar16 __ovld __cnfn convert_uchar16_sat_rte(int16); uchar16 __ovld __cnfn convert_uchar16_rtz(int16); uchar16 __ovld __cnfn convert_uchar16_sat_rtz(int16); uchar16 __ovld __cnfn convert_uchar16_rtp(int16); uchar16 __ovld __cnfn convert_uchar16_sat_rtp(int16); uchar16 __ovld __cnfn convert_uchar16_rtn(int16); uchar16 __ovld __cnfn convert_uchar16_sat_rtn(int16); uchar16 __ovld __cnfn convert_uchar16(int16); uchar16 __ovld __cnfn convert_uchar16_sat(int16); uchar16 __ovld __cnfn convert_uchar16_rte(uint16); uchar16 __ovld __cnfn convert_uchar16_sat_rte(uint16); uchar16 __ovld __cnfn convert_uchar16_rtz(uint16); uchar16 __ovld __cnfn convert_uchar16_sat_rtz(uint16); uchar16 __ovld __cnfn convert_uchar16_rtp(uint16); uchar16 __ovld __cnfn convert_uchar16_sat_rtp(uint16); uchar16 __ovld __cnfn convert_uchar16_rtn(uint16); uchar16 __ovld __cnfn convert_uchar16_sat_rtn(uint16); uchar16 __ovld __cnfn convert_uchar16(uint16); uchar16 __ovld __cnfn convert_uchar16_sat(uint16); uchar16 __ovld __cnfn convert_uchar16_rte(long16); uchar16 __ovld __cnfn convert_uchar16_sat_rte(long16); uchar16 __ovld __cnfn convert_uchar16_rtz(long16); uchar16 __ovld __cnfn convert_uchar16_sat_rtz(long16); uchar16 __ovld __cnfn convert_uchar16_rtp(long16); uchar16 __ovld __cnfn convert_uchar16_sat_rtp(long16); uchar16 __ovld __cnfn convert_uchar16_rtn(long16); uchar16 __ovld __cnfn convert_uchar16_sat_rtn(long16); uchar16 __ovld __cnfn convert_uchar16(long16); uchar16 __ovld __cnfn convert_uchar16_sat(long16); uchar16 __ovld __cnfn convert_uchar16_rte(ulong16); uchar16 __ovld __cnfn convert_uchar16_sat_rte(ulong16); uchar16 __ovld __cnfn convert_uchar16_rtz(ulong16); uchar16 __ovld __cnfn convert_uchar16_sat_rtz(ulong16); uchar16 __ovld __cnfn convert_uchar16_rtp(ulong16); uchar16 __ovld __cnfn convert_uchar16_sat_rtp(ulong16); uchar16 __ovld __cnfn convert_uchar16_rtn(ulong16); uchar16 __ovld __cnfn convert_uchar16_sat_rtn(ulong16); uchar16 __ovld __cnfn convert_uchar16(ulong16); uchar16 __ovld __cnfn convert_uchar16_sat(ulong16); uchar16 __ovld __cnfn convert_uchar16_rte(float16); uchar16 __ovld __cnfn convert_uchar16_sat_rte(float16); uchar16 __ovld __cnfn convert_uchar16_rtz(float16); uchar16 __ovld __cnfn convert_uchar16_sat_rtz(float16); uchar16 __ovld __cnfn convert_uchar16_rtp(float16); uchar16 __ovld __cnfn convert_uchar16_sat_rtp(float16); uchar16 __ovld __cnfn convert_uchar16_rtn(float16); uchar16 __ovld __cnfn convert_uchar16_sat_rtn(float16); uchar16 __ovld __cnfn convert_uchar16(float16); uchar16 __ovld __cnfn convert_uchar16_sat(float16); short16 __ovld __cnfn convert_short16_rte(char16); short16 __ovld __cnfn convert_short16_sat_rte(char16); short16 __ovld __cnfn convert_short16_rtz(char16); short16 __ovld __cnfn convert_short16_sat_rtz(char16); short16 __ovld __cnfn convert_short16_rtp(char16); short16 __ovld __cnfn convert_short16_sat_rtp(char16); short16 __ovld __cnfn convert_short16_rtn(char16); short16 __ovld __cnfn convert_short16_sat_rtn(char16); short16 __ovld __cnfn convert_short16(char16); short16 __ovld __cnfn convert_short16_sat(char16); short16 __ovld __cnfn convert_short16_rte(uchar16); short16 __ovld __cnfn convert_short16_sat_rte(uchar16); short16 __ovld __cnfn convert_short16_rtz(uchar16); short16 __ovld __cnfn convert_short16_sat_rtz(uchar16); short16 __ovld __cnfn convert_short16_rtp(uchar16); short16 __ovld __cnfn convert_short16_sat_rtp(uchar16); short16 __ovld __cnfn convert_short16_rtn(uchar16); short16 __ovld __cnfn convert_short16_sat_rtn(uchar16); short16 __ovld __cnfn convert_short16(uchar16); short16 __ovld __cnfn convert_short16_sat(uchar16); short16 __ovld __cnfn convert_short16_rte(short16); short16 __ovld __cnfn convert_short16_sat_rte(short16); short16 __ovld __cnfn convert_short16_rtz(short16); short16 __ovld __cnfn convert_short16_sat_rtz(short16); short16 __ovld __cnfn convert_short16_rtp(short16); short16 __ovld __cnfn convert_short16_sat_rtp(short16); short16 __ovld __cnfn convert_short16_rtn(short16); short16 __ovld __cnfn convert_short16_sat_rtn(short16); short16 __ovld __cnfn convert_short16(short16); short16 __ovld __cnfn convert_short16_sat(short16); short16 __ovld __cnfn convert_short16_rte(ushort16); short16 __ovld __cnfn convert_short16_sat_rte(ushort16); short16 __ovld __cnfn convert_short16_rtz(ushort16); short16 __ovld __cnfn convert_short16_sat_rtz(ushort16); short16 __ovld __cnfn convert_short16_rtp(ushort16); short16 __ovld __cnfn convert_short16_sat_rtp(ushort16); short16 __ovld __cnfn convert_short16_rtn(ushort16); short16 __ovld __cnfn convert_short16_sat_rtn(ushort16); short16 __ovld __cnfn convert_short16(ushort16); short16 __ovld __cnfn convert_short16_sat(ushort16); short16 __ovld __cnfn convert_short16_rte(int16); short16 __ovld __cnfn convert_short16_sat_rte(int16); short16 __ovld __cnfn convert_short16_rtz(int16); short16 __ovld __cnfn convert_short16_sat_rtz(int16); short16 __ovld __cnfn convert_short16_rtp(int16); short16 __ovld __cnfn convert_short16_sat_rtp(int16); short16 __ovld __cnfn convert_short16_rtn(int16); short16 __ovld __cnfn convert_short16_sat_rtn(int16); short16 __ovld __cnfn convert_short16(int16); short16 __ovld __cnfn convert_short16_sat(int16); short16 __ovld __cnfn convert_short16_rte(uint16); short16 __ovld __cnfn convert_short16_sat_rte(uint16); short16 __ovld __cnfn convert_short16_rtz(uint16); short16 __ovld __cnfn convert_short16_sat_rtz(uint16); short16 __ovld __cnfn convert_short16_rtp(uint16); short16 __ovld __cnfn convert_short16_sat_rtp(uint16); short16 __ovld __cnfn convert_short16_rtn(uint16); short16 __ovld __cnfn convert_short16_sat_rtn(uint16); short16 __ovld __cnfn convert_short16(uint16); short16 __ovld __cnfn convert_short16_sat(uint16); short16 __ovld __cnfn convert_short16_rte(long16); short16 __ovld __cnfn convert_short16_sat_rte(long16); short16 __ovld __cnfn convert_short16_rtz(long16); short16 __ovld __cnfn convert_short16_sat_rtz(long16); short16 __ovld __cnfn convert_short16_rtp(long16); short16 __ovld __cnfn convert_short16_sat_rtp(long16); short16 __ovld __cnfn convert_short16_rtn(long16); short16 __ovld __cnfn convert_short16_sat_rtn(long16); short16 __ovld __cnfn convert_short16(long16); short16 __ovld __cnfn convert_short16_sat(long16); short16 __ovld __cnfn convert_short16_rte(ulong16); short16 __ovld __cnfn convert_short16_sat_rte(ulong16); short16 __ovld __cnfn convert_short16_rtz(ulong16); short16 __ovld __cnfn convert_short16_sat_rtz(ulong16); short16 __ovld __cnfn convert_short16_rtp(ulong16); short16 __ovld __cnfn convert_short16_sat_rtp(ulong16); short16 __ovld __cnfn convert_short16_rtn(ulong16); short16 __ovld __cnfn convert_short16_sat_rtn(ulong16); short16 __ovld __cnfn convert_short16(ulong16); short16 __ovld __cnfn convert_short16_sat(ulong16); short16 __ovld __cnfn convert_short16_rte(float16); short16 __ovld __cnfn convert_short16_sat_rte(float16); short16 __ovld __cnfn convert_short16_rtz(float16); short16 __ovld __cnfn convert_short16_sat_rtz(float16); short16 __ovld __cnfn convert_short16_rtp(float16); short16 __ovld __cnfn convert_short16_sat_rtp(float16); short16 __ovld __cnfn convert_short16_rtn(float16); short16 __ovld __cnfn convert_short16_sat_rtn(float16); short16 __ovld __cnfn convert_short16(float16); short16 __ovld __cnfn convert_short16_sat(float16); ushort16 __ovld __cnfn convert_ushort16_rte(char16); ushort16 __ovld __cnfn convert_ushort16_sat_rte(char16); ushort16 __ovld __cnfn convert_ushort16_rtz(char16); ushort16 __ovld __cnfn convert_ushort16_sat_rtz(char16); ushort16 __ovld __cnfn convert_ushort16_rtp(char16); ushort16 __ovld __cnfn convert_ushort16_sat_rtp(char16); ushort16 __ovld __cnfn convert_ushort16_rtn(char16); ushort16 __ovld __cnfn convert_ushort16_sat_rtn(char16); ushort16 __ovld __cnfn convert_ushort16(char16); ushort16 __ovld __cnfn convert_ushort16_sat(char16); ushort16 __ovld __cnfn convert_ushort16_rte(uchar16); ushort16 __ovld __cnfn convert_ushort16_sat_rte(uchar16); ushort16 __ovld __cnfn convert_ushort16_rtz(uchar16); ushort16 __ovld __cnfn convert_ushort16_sat_rtz(uchar16); ushort16 __ovld __cnfn convert_ushort16_rtp(uchar16); ushort16 __ovld __cnfn convert_ushort16_sat_rtp(uchar16); ushort16 __ovld __cnfn convert_ushort16_rtn(uchar16); ushort16 __ovld __cnfn convert_ushort16_sat_rtn(uchar16); ushort16 __ovld __cnfn convert_ushort16(uchar16); ushort16 __ovld __cnfn convert_ushort16_sat(uchar16); ushort16 __ovld __cnfn convert_ushort16_rte(short16); ushort16 __ovld __cnfn convert_ushort16_sat_rte(short16); ushort16 __ovld __cnfn convert_ushort16_rtz(short16); ushort16 __ovld __cnfn convert_ushort16_sat_rtz(short16); ushort16 __ovld __cnfn convert_ushort16_rtp(short16); ushort16 __ovld __cnfn convert_ushort16_sat_rtp(short16); ushort16 __ovld __cnfn convert_ushort16_rtn(short16); ushort16 __ovld __cnfn convert_ushort16_sat_rtn(short16); ushort16 __ovld __cnfn convert_ushort16(short16); ushort16 __ovld __cnfn convert_ushort16_sat(short16); ushort16 __ovld __cnfn convert_ushort16_rte(ushort16); ushort16 __ovld __cnfn convert_ushort16_sat_rte(ushort16); ushort16 __ovld __cnfn convert_ushort16_rtz(ushort16); ushort16 __ovld __cnfn convert_ushort16_sat_rtz(ushort16); ushort16 __ovld __cnfn convert_ushort16_rtp(ushort16); ushort16 __ovld __cnfn convert_ushort16_sat_rtp(ushort16); ushort16 __ovld __cnfn convert_ushort16_rtn(ushort16); ushort16 __ovld __cnfn convert_ushort16_sat_rtn(ushort16); ushort16 __ovld __cnfn convert_ushort16(ushort16); ushort16 __ovld __cnfn convert_ushort16_sat(ushort16); ushort16 __ovld __cnfn convert_ushort16_rte(int16); ushort16 __ovld __cnfn convert_ushort16_sat_rte(int16); ushort16 __ovld __cnfn convert_ushort16_rtz(int16); ushort16 __ovld __cnfn convert_ushort16_sat_rtz(int16); ushort16 __ovld __cnfn convert_ushort16_rtp(int16); ushort16 __ovld __cnfn convert_ushort16_sat_rtp(int16); ushort16 __ovld __cnfn convert_ushort16_rtn(int16); ushort16 __ovld __cnfn convert_ushort16_sat_rtn(int16); ushort16 __ovld __cnfn convert_ushort16(int16); ushort16 __ovld __cnfn convert_ushort16_sat(int16); ushort16 __ovld __cnfn convert_ushort16_rte(uint16); ushort16 __ovld __cnfn convert_ushort16_sat_rte(uint16); ushort16 __ovld __cnfn convert_ushort16_rtz(uint16); ushort16 __ovld __cnfn convert_ushort16_sat_rtz(uint16); ushort16 __ovld __cnfn convert_ushort16_rtp(uint16); ushort16 __ovld __cnfn convert_ushort16_sat_rtp(uint16); ushort16 __ovld __cnfn convert_ushort16_rtn(uint16); ushort16 __ovld __cnfn convert_ushort16_sat_rtn(uint16); ushort16 __ovld __cnfn convert_ushort16(uint16); ushort16 __ovld __cnfn convert_ushort16_sat(uint16); ushort16 __ovld __cnfn convert_ushort16_rte(long16); ushort16 __ovld __cnfn convert_ushort16_sat_rte(long16); ushort16 __ovld __cnfn convert_ushort16_rtz(long16); ushort16 __ovld __cnfn convert_ushort16_sat_rtz(long16); ushort16 __ovld __cnfn convert_ushort16_rtp(long16); ushort16 __ovld __cnfn convert_ushort16_sat_rtp(long16); ushort16 __ovld __cnfn convert_ushort16_rtn(long16); ushort16 __ovld __cnfn convert_ushort16_sat_rtn(long16); ushort16 __ovld __cnfn convert_ushort16(long16); ushort16 __ovld __cnfn convert_ushort16_sat(long16); ushort16 __ovld __cnfn convert_ushort16_rte(ulong16); ushort16 __ovld __cnfn convert_ushort16_sat_rte(ulong16); ushort16 __ovld __cnfn convert_ushort16_rtz(ulong16); ushort16 __ovld __cnfn convert_ushort16_sat_rtz(ulong16); ushort16 __ovld __cnfn convert_ushort16_rtp(ulong16); ushort16 __ovld __cnfn convert_ushort16_sat_rtp(ulong16); ushort16 __ovld __cnfn convert_ushort16_rtn(ulong16); ushort16 __ovld __cnfn convert_ushort16_sat_rtn(ulong16); ushort16 __ovld __cnfn convert_ushort16(ulong16); ushort16 __ovld __cnfn convert_ushort16_sat(ulong16); ushort16 __ovld __cnfn convert_ushort16_rte(float16); ushort16 __ovld __cnfn convert_ushort16_sat_rte(float16); ushort16 __ovld __cnfn convert_ushort16_rtz(float16); ushort16 __ovld __cnfn convert_ushort16_sat_rtz(float16); ushort16 __ovld __cnfn convert_ushort16_rtp(float16); ushort16 __ovld __cnfn convert_ushort16_sat_rtp(float16); ushort16 __ovld __cnfn convert_ushort16_rtn(float16); ushort16 __ovld __cnfn convert_ushort16_sat_rtn(float16); ushort16 __ovld __cnfn convert_ushort16(float16); ushort16 __ovld __cnfn convert_ushort16_sat(float16); int16 __ovld __cnfn convert_int16_rte(char16); int16 __ovld __cnfn convert_int16_sat_rte(char16); int16 __ovld __cnfn convert_int16_rtz(char16); int16 __ovld __cnfn convert_int16_sat_rtz(char16); int16 __ovld __cnfn convert_int16_rtp(char16); int16 __ovld __cnfn convert_int16_sat_rtp(char16); int16 __ovld __cnfn convert_int16_rtn(char16); int16 __ovld __cnfn convert_int16_sat_rtn(char16); int16 __ovld __cnfn convert_int16(char16); int16 __ovld __cnfn convert_int16_sat(char16); int16 __ovld __cnfn convert_int16_rte(uchar16); int16 __ovld __cnfn convert_int16_sat_rte(uchar16); int16 __ovld __cnfn convert_int16_rtz(uchar16); int16 __ovld __cnfn convert_int16_sat_rtz(uchar16); int16 __ovld __cnfn convert_int16_rtp(uchar16); int16 __ovld __cnfn convert_int16_sat_rtp(uchar16); int16 __ovld __cnfn convert_int16_rtn(uchar16); int16 __ovld __cnfn convert_int16_sat_rtn(uchar16); int16 __ovld __cnfn convert_int16(uchar16); int16 __ovld __cnfn convert_int16_sat(uchar16); int16 __ovld __cnfn convert_int16_rte(short16); int16 __ovld __cnfn convert_int16_sat_rte(short16); int16 __ovld __cnfn convert_int16_rtz(short16); int16 __ovld __cnfn convert_int16_sat_rtz(short16); int16 __ovld __cnfn convert_int16_rtp(short16); int16 __ovld __cnfn convert_int16_sat_rtp(short16); int16 __ovld __cnfn convert_int16_rtn(short16); int16 __ovld __cnfn convert_int16_sat_rtn(short16); int16 __ovld __cnfn convert_int16(short16); int16 __ovld __cnfn convert_int16_sat(short16); int16 __ovld __cnfn convert_int16_rte(ushort16); int16 __ovld __cnfn convert_int16_sat_rte(ushort16); int16 __ovld __cnfn convert_int16_rtz(ushort16); int16 __ovld __cnfn convert_int16_sat_rtz(ushort16); int16 __ovld __cnfn convert_int16_rtp(ushort16); int16 __ovld __cnfn convert_int16_sat_rtp(ushort16); int16 __ovld __cnfn convert_int16_rtn(ushort16); int16 __ovld __cnfn convert_int16_sat_rtn(ushort16); int16 __ovld __cnfn convert_int16(ushort16); int16 __ovld __cnfn convert_int16_sat(ushort16); int16 __ovld __cnfn convert_int16_rte(int16); int16 __ovld __cnfn convert_int16_sat_rte(int16); int16 __ovld __cnfn convert_int16_rtz(int16); int16 __ovld __cnfn convert_int16_sat_rtz(int16); int16 __ovld __cnfn convert_int16_rtp(int16); int16 __ovld __cnfn convert_int16_sat_rtp(int16); int16 __ovld __cnfn convert_int16_rtn(int16); int16 __ovld __cnfn convert_int16_sat_rtn(int16); int16 __ovld __cnfn convert_int16(int16); int16 __ovld __cnfn convert_int16_sat(int16); int16 __ovld __cnfn convert_int16_rte(uint16); int16 __ovld __cnfn convert_int16_sat_rte(uint16); int16 __ovld __cnfn convert_int16_rtz(uint16); int16 __ovld __cnfn convert_int16_sat_rtz(uint16); int16 __ovld __cnfn convert_int16_rtp(uint16); int16 __ovld __cnfn convert_int16_sat_rtp(uint16); int16 __ovld __cnfn convert_int16_rtn(uint16); int16 __ovld __cnfn convert_int16_sat_rtn(uint16); int16 __ovld __cnfn convert_int16(uint16); int16 __ovld __cnfn convert_int16_sat(uint16); int16 __ovld __cnfn convert_int16_rte(long16); int16 __ovld __cnfn convert_int16_sat_rte(long16); int16 __ovld __cnfn convert_int16_rtz(long16); int16 __ovld __cnfn convert_int16_sat_rtz(long16); int16 __ovld __cnfn convert_int16_rtp(long16); int16 __ovld __cnfn convert_int16_sat_rtp(long16); int16 __ovld __cnfn convert_int16_rtn(long16); int16 __ovld __cnfn convert_int16_sat_rtn(long16); int16 __ovld __cnfn convert_int16(long16); int16 __ovld __cnfn convert_int16_sat(long16); int16 __ovld __cnfn convert_int16_rte(ulong16); int16 __ovld __cnfn convert_int16_sat_rte(ulong16); int16 __ovld __cnfn convert_int16_rtz(ulong16); int16 __ovld __cnfn convert_int16_sat_rtz(ulong16); int16 __ovld __cnfn convert_int16_rtp(ulong16); int16 __ovld __cnfn convert_int16_sat_rtp(ulong16); int16 __ovld __cnfn convert_int16_rtn(ulong16); int16 __ovld __cnfn convert_int16_sat_rtn(ulong16); int16 __ovld __cnfn convert_int16(ulong16); int16 __ovld __cnfn convert_int16_sat(ulong16); int16 __ovld __cnfn convert_int16_rte(float16); int16 __ovld __cnfn convert_int16_sat_rte(float16); int16 __ovld __cnfn convert_int16_rtz(float16); int16 __ovld __cnfn convert_int16_sat_rtz(float16); int16 __ovld __cnfn convert_int16_rtp(float16); int16 __ovld __cnfn convert_int16_sat_rtp(float16); int16 __ovld __cnfn convert_int16_rtn(float16); int16 __ovld __cnfn convert_int16_sat_rtn(float16); int16 __ovld __cnfn convert_int16(float16); int16 __ovld __cnfn convert_int16_sat(float16); uint16 __ovld __cnfn convert_uint16_rte(char16); uint16 __ovld __cnfn convert_uint16_sat_rte(char16); uint16 __ovld __cnfn convert_uint16_rtz(char16); uint16 __ovld __cnfn convert_uint16_sat_rtz(char16); uint16 __ovld __cnfn convert_uint16_rtp(char16); uint16 __ovld __cnfn convert_uint16_sat_rtp(char16); uint16 __ovld __cnfn convert_uint16_rtn(char16); uint16 __ovld __cnfn convert_uint16_sat_rtn(char16); uint16 __ovld __cnfn convert_uint16(char16); uint16 __ovld __cnfn convert_uint16_sat(char16); uint16 __ovld __cnfn convert_uint16_rte(uchar16); uint16 __ovld __cnfn convert_uint16_sat_rte(uchar16); uint16 __ovld __cnfn convert_uint16_rtz(uchar16); uint16 __ovld __cnfn convert_uint16_sat_rtz(uchar16); uint16 __ovld __cnfn convert_uint16_rtp(uchar16); uint16 __ovld __cnfn convert_uint16_sat_rtp(uchar16); uint16 __ovld __cnfn convert_uint16_rtn(uchar16); uint16 __ovld __cnfn convert_uint16_sat_rtn(uchar16); uint16 __ovld __cnfn convert_uint16(uchar16); uint16 __ovld __cnfn convert_uint16_sat(uchar16); uint16 __ovld __cnfn convert_uint16_rte(short16); uint16 __ovld __cnfn convert_uint16_sat_rte(short16); uint16 __ovld __cnfn convert_uint16_rtz(short16); uint16 __ovld __cnfn convert_uint16_sat_rtz(short16); uint16 __ovld __cnfn convert_uint16_rtp(short16); uint16 __ovld __cnfn convert_uint16_sat_rtp(short16); uint16 __ovld __cnfn convert_uint16_rtn(short16); uint16 __ovld __cnfn convert_uint16_sat_rtn(short16); uint16 __ovld __cnfn convert_uint16(short16); uint16 __ovld __cnfn convert_uint16_sat(short16); uint16 __ovld __cnfn convert_uint16_rte(ushort16); uint16 __ovld __cnfn convert_uint16_sat_rte(ushort16); uint16 __ovld __cnfn convert_uint16_rtz(ushort16); uint16 __ovld __cnfn convert_uint16_sat_rtz(ushort16); uint16 __ovld __cnfn convert_uint16_rtp(ushort16); uint16 __ovld __cnfn convert_uint16_sat_rtp(ushort16); uint16 __ovld __cnfn convert_uint16_rtn(ushort16); uint16 __ovld __cnfn convert_uint16_sat_rtn(ushort16); uint16 __ovld __cnfn convert_uint16(ushort16); uint16 __ovld __cnfn convert_uint16_sat(ushort16); uint16 __ovld __cnfn convert_uint16_rte(int16); uint16 __ovld __cnfn convert_uint16_sat_rte(int16); uint16 __ovld __cnfn convert_uint16_rtz(int16); uint16 __ovld __cnfn convert_uint16_sat_rtz(int16); uint16 __ovld __cnfn convert_uint16_rtp(int16); uint16 __ovld __cnfn convert_uint16_sat_rtp(int16); uint16 __ovld __cnfn convert_uint16_rtn(int16); uint16 __ovld __cnfn convert_uint16_sat_rtn(int16); uint16 __ovld __cnfn convert_uint16(int16); uint16 __ovld __cnfn convert_uint16_sat(int16); uint16 __ovld __cnfn convert_uint16_rte(uint16); uint16 __ovld __cnfn convert_uint16_sat_rte(uint16); uint16 __ovld __cnfn convert_uint16_rtz(uint16); uint16 __ovld __cnfn convert_uint16_sat_rtz(uint16); uint16 __ovld __cnfn convert_uint16_rtp(uint16); uint16 __ovld __cnfn convert_uint16_sat_rtp(uint16); uint16 __ovld __cnfn convert_uint16_rtn(uint16); uint16 __ovld __cnfn convert_uint16_sat_rtn(uint16); uint16 __ovld __cnfn convert_uint16(uint16); uint16 __ovld __cnfn convert_uint16_sat(uint16); uint16 __ovld __cnfn convert_uint16_rte(long16); uint16 __ovld __cnfn convert_uint16_sat_rte(long16); uint16 __ovld __cnfn convert_uint16_rtz(long16); uint16 __ovld __cnfn convert_uint16_sat_rtz(long16); uint16 __ovld __cnfn convert_uint16_rtp(long16); uint16 __ovld __cnfn convert_uint16_sat_rtp(long16); uint16 __ovld __cnfn convert_uint16_rtn(long16); uint16 __ovld __cnfn convert_uint16_sat_rtn(long16); uint16 __ovld __cnfn convert_uint16(long16); uint16 __ovld __cnfn convert_uint16_sat(long16); uint16 __ovld __cnfn convert_uint16_rte(ulong16); uint16 __ovld __cnfn convert_uint16_sat_rte(ulong16); uint16 __ovld __cnfn convert_uint16_rtz(ulong16); uint16 __ovld __cnfn convert_uint16_sat_rtz(ulong16); uint16 __ovld __cnfn convert_uint16_rtp(ulong16); uint16 __ovld __cnfn convert_uint16_sat_rtp(ulong16); uint16 __ovld __cnfn convert_uint16_rtn(ulong16); uint16 __ovld __cnfn convert_uint16_sat_rtn(ulong16); uint16 __ovld __cnfn convert_uint16(ulong16); uint16 __ovld __cnfn convert_uint16_sat(ulong16); uint16 __ovld __cnfn convert_uint16_rte(float16); uint16 __ovld __cnfn convert_uint16_sat_rte(float16); uint16 __ovld __cnfn convert_uint16_rtz(float16); uint16 __ovld __cnfn convert_uint16_sat_rtz(float16); uint16 __ovld __cnfn convert_uint16_rtp(float16); uint16 __ovld __cnfn convert_uint16_sat_rtp(float16); uint16 __ovld __cnfn convert_uint16_rtn(float16); uint16 __ovld __cnfn convert_uint16_sat_rtn(float16); uint16 __ovld __cnfn convert_uint16(float16); uint16 __ovld __cnfn convert_uint16_sat(float16); long16 __ovld __cnfn convert_long16_rte(char16); long16 __ovld __cnfn convert_long16_sat_rte(char16); long16 __ovld __cnfn convert_long16_rtz(char16); long16 __ovld __cnfn convert_long16_sat_rtz(char16); long16 __ovld __cnfn convert_long16_rtp(char16); long16 __ovld __cnfn convert_long16_sat_rtp(char16); long16 __ovld __cnfn convert_long16_rtn(char16); long16 __ovld __cnfn convert_long16_sat_rtn(char16); long16 __ovld __cnfn convert_long16(char16); long16 __ovld __cnfn convert_long16_sat(char16); long16 __ovld __cnfn convert_long16_rte(uchar16); long16 __ovld __cnfn convert_long16_sat_rte(uchar16); long16 __ovld __cnfn convert_long16_rtz(uchar16); long16 __ovld __cnfn convert_long16_sat_rtz(uchar16); long16 __ovld __cnfn convert_long16_rtp(uchar16); long16 __ovld __cnfn convert_long16_sat_rtp(uchar16); long16 __ovld __cnfn convert_long16_rtn(uchar16); long16 __ovld __cnfn convert_long16_sat_rtn(uchar16); long16 __ovld __cnfn convert_long16(uchar16); long16 __ovld __cnfn convert_long16_sat(uchar16); long16 __ovld __cnfn convert_long16_rte(short16); long16 __ovld __cnfn convert_long16_sat_rte(short16); long16 __ovld __cnfn convert_long16_rtz(short16); long16 __ovld __cnfn convert_long16_sat_rtz(short16); long16 __ovld __cnfn convert_long16_rtp(short16); long16 __ovld __cnfn convert_long16_sat_rtp(short16); long16 __ovld __cnfn convert_long16_rtn(short16); long16 __ovld __cnfn convert_long16_sat_rtn(short16); long16 __ovld __cnfn convert_long16(short16); long16 __ovld __cnfn convert_long16_sat(short16); long16 __ovld __cnfn convert_long16_rte(ushort16); long16 __ovld __cnfn convert_long16_sat_rte(ushort16); long16 __ovld __cnfn convert_long16_rtz(ushort16); long16 __ovld __cnfn convert_long16_sat_rtz(ushort16); long16 __ovld __cnfn convert_long16_rtp(ushort16); long16 __ovld __cnfn convert_long16_sat_rtp(ushort16); long16 __ovld __cnfn convert_long16_rtn(ushort16); long16 __ovld __cnfn convert_long16_sat_rtn(ushort16); long16 __ovld __cnfn convert_long16(ushort16); long16 __ovld __cnfn convert_long16_sat(ushort16); long16 __ovld __cnfn convert_long16_rte(int16); long16 __ovld __cnfn convert_long16_sat_rte(int16); long16 __ovld __cnfn convert_long16_rtz(int16); long16 __ovld __cnfn convert_long16_sat_rtz(int16); long16 __ovld __cnfn convert_long16_rtp(int16); long16 __ovld __cnfn convert_long16_sat_rtp(int16); long16 __ovld __cnfn convert_long16_rtn(int16); long16 __ovld __cnfn convert_long16_sat_rtn(int16); long16 __ovld __cnfn convert_long16(int16); long16 __ovld __cnfn convert_long16_sat(int16); long16 __ovld __cnfn convert_long16_rte(uint16); long16 __ovld __cnfn convert_long16_sat_rte(uint16); long16 __ovld __cnfn convert_long16_rtz(uint16); long16 __ovld __cnfn convert_long16_sat_rtz(uint16); long16 __ovld __cnfn convert_long16_rtp(uint16); long16 __ovld __cnfn convert_long16_sat_rtp(uint16); long16 __ovld __cnfn convert_long16_rtn(uint16); long16 __ovld __cnfn convert_long16_sat_rtn(uint16); long16 __ovld __cnfn convert_long16(uint16); long16 __ovld __cnfn convert_long16_sat(uint16); long16 __ovld __cnfn convert_long16_rte(long16); long16 __ovld __cnfn convert_long16_sat_rte(long16); long16 __ovld __cnfn convert_long16_rtz(long16); long16 __ovld __cnfn convert_long16_sat_rtz(long16); long16 __ovld __cnfn convert_long16_rtp(long16); long16 __ovld __cnfn convert_long16_sat_rtp(long16); long16 __ovld __cnfn convert_long16_rtn(long16); long16 __ovld __cnfn convert_long16_sat_rtn(long16); long16 __ovld __cnfn convert_long16(long16); long16 __ovld __cnfn convert_long16_sat(long16); long16 __ovld __cnfn convert_long16_rte(ulong16); long16 __ovld __cnfn convert_long16_sat_rte(ulong16); long16 __ovld __cnfn convert_long16_rtz(ulong16); long16 __ovld __cnfn convert_long16_sat_rtz(ulong16); long16 __ovld __cnfn convert_long16_rtp(ulong16); long16 __ovld __cnfn convert_long16_sat_rtp(ulong16); long16 __ovld __cnfn convert_long16_rtn(ulong16); long16 __ovld __cnfn convert_long16_sat_rtn(ulong16); long16 __ovld __cnfn convert_long16(ulong16); long16 __ovld __cnfn convert_long16_sat(ulong16); long16 __ovld __cnfn convert_long16_rte(float16); long16 __ovld __cnfn convert_long16_sat_rte(float16); long16 __ovld __cnfn convert_long16_rtz(float16); long16 __ovld __cnfn convert_long16_sat_rtz(float16); long16 __ovld __cnfn convert_long16_rtp(float16); long16 __ovld __cnfn convert_long16_sat_rtp(float16); long16 __ovld __cnfn convert_long16_rtn(float16); long16 __ovld __cnfn convert_long16_sat_rtn(float16); long16 __ovld __cnfn convert_long16(float16); long16 __ovld __cnfn convert_long16_sat(float16); ulong16 __ovld __cnfn convert_ulong16_rte(char16); ulong16 __ovld __cnfn convert_ulong16_sat_rte(char16); ulong16 __ovld __cnfn convert_ulong16_rtz(char16); ulong16 __ovld __cnfn convert_ulong16_sat_rtz(char16); ulong16 __ovld __cnfn convert_ulong16_rtp(char16); ulong16 __ovld __cnfn convert_ulong16_sat_rtp(char16); ulong16 __ovld __cnfn convert_ulong16_rtn(char16); ulong16 __ovld __cnfn convert_ulong16_sat_rtn(char16); ulong16 __ovld __cnfn convert_ulong16(char16); ulong16 __ovld __cnfn convert_ulong16_sat(char16); ulong16 __ovld __cnfn convert_ulong16_rte(uchar16); ulong16 __ovld __cnfn convert_ulong16_sat_rte(uchar16); ulong16 __ovld __cnfn convert_ulong16_rtz(uchar16); ulong16 __ovld __cnfn convert_ulong16_sat_rtz(uchar16); ulong16 __ovld __cnfn convert_ulong16_rtp(uchar16); ulong16 __ovld __cnfn convert_ulong16_sat_rtp(uchar16); ulong16 __ovld __cnfn convert_ulong16_rtn(uchar16); ulong16 __ovld __cnfn convert_ulong16_sat_rtn(uchar16); ulong16 __ovld __cnfn convert_ulong16(uchar16); ulong16 __ovld __cnfn convert_ulong16_sat(uchar16); ulong16 __ovld __cnfn convert_ulong16_rte(short16); ulong16 __ovld __cnfn convert_ulong16_sat_rte(short16); ulong16 __ovld __cnfn convert_ulong16_rtz(short16); ulong16 __ovld __cnfn convert_ulong16_sat_rtz(short16); ulong16 __ovld __cnfn convert_ulong16_rtp(short16); ulong16 __ovld __cnfn convert_ulong16_sat_rtp(short16); ulong16 __ovld __cnfn convert_ulong16_rtn(short16); ulong16 __ovld __cnfn convert_ulong16_sat_rtn(short16); ulong16 __ovld __cnfn convert_ulong16(short16); ulong16 __ovld __cnfn convert_ulong16_sat(short16); ulong16 __ovld __cnfn convert_ulong16_rte(ushort16); ulong16 __ovld __cnfn convert_ulong16_sat_rte(ushort16); ulong16 __ovld __cnfn convert_ulong16_rtz(ushort16); ulong16 __ovld __cnfn convert_ulong16_sat_rtz(ushort16); ulong16 __ovld __cnfn convert_ulong16_rtp(ushort16); ulong16 __ovld __cnfn convert_ulong16_sat_rtp(ushort16); ulong16 __ovld __cnfn convert_ulong16_rtn(ushort16); ulong16 __ovld __cnfn convert_ulong16_sat_rtn(ushort16); ulong16 __ovld __cnfn convert_ulong16(ushort16); ulong16 __ovld __cnfn convert_ulong16_sat(ushort16); ulong16 __ovld __cnfn convert_ulong16_rte(int16); ulong16 __ovld __cnfn convert_ulong16_sat_rte(int16); ulong16 __ovld __cnfn convert_ulong16_rtz(int16); ulong16 __ovld __cnfn convert_ulong16_sat_rtz(int16); ulong16 __ovld __cnfn convert_ulong16_rtp(int16); ulong16 __ovld __cnfn convert_ulong16_sat_rtp(int16); ulong16 __ovld __cnfn convert_ulong16_rtn(int16); ulong16 __ovld __cnfn convert_ulong16_sat_rtn(int16); ulong16 __ovld __cnfn convert_ulong16(int16); ulong16 __ovld __cnfn convert_ulong16_sat(int16); ulong16 __ovld __cnfn convert_ulong16_rte(uint16); ulong16 __ovld __cnfn convert_ulong16_sat_rte(uint16); ulong16 __ovld __cnfn convert_ulong16_rtz(uint16); ulong16 __ovld __cnfn convert_ulong16_sat_rtz(uint16); ulong16 __ovld __cnfn convert_ulong16_rtp(uint16); ulong16 __ovld __cnfn convert_ulong16_sat_rtp(uint16); ulong16 __ovld __cnfn convert_ulong16_rtn(uint16); ulong16 __ovld __cnfn convert_ulong16_sat_rtn(uint16); ulong16 __ovld __cnfn convert_ulong16(uint16); ulong16 __ovld __cnfn convert_ulong16_sat(uint16); ulong16 __ovld __cnfn convert_ulong16_rte(long16); ulong16 __ovld __cnfn convert_ulong16_sat_rte(long16); ulong16 __ovld __cnfn convert_ulong16_rtz(long16); ulong16 __ovld __cnfn convert_ulong16_sat_rtz(long16); ulong16 __ovld __cnfn convert_ulong16_rtp(long16); ulong16 __ovld __cnfn convert_ulong16_sat_rtp(long16); ulong16 __ovld __cnfn convert_ulong16_rtn(long16); ulong16 __ovld __cnfn convert_ulong16_sat_rtn(long16); ulong16 __ovld __cnfn convert_ulong16(long16); ulong16 __ovld __cnfn convert_ulong16_sat(long16); ulong16 __ovld __cnfn convert_ulong16_rte(ulong16); ulong16 __ovld __cnfn convert_ulong16_sat_rte(ulong16); ulong16 __ovld __cnfn convert_ulong16_rtz(ulong16); ulong16 __ovld __cnfn convert_ulong16_sat_rtz(ulong16); ulong16 __ovld __cnfn convert_ulong16_rtp(ulong16); ulong16 __ovld __cnfn convert_ulong16_sat_rtp(ulong16); ulong16 __ovld __cnfn convert_ulong16_rtn(ulong16); ulong16 __ovld __cnfn convert_ulong16_sat_rtn(ulong16); ulong16 __ovld __cnfn convert_ulong16(ulong16); ulong16 __ovld __cnfn convert_ulong16_sat(ulong16); ulong16 __ovld __cnfn convert_ulong16_rte(float16); ulong16 __ovld __cnfn convert_ulong16_sat_rte(float16); ulong16 __ovld __cnfn convert_ulong16_rtz(float16); ulong16 __ovld __cnfn convert_ulong16_sat_rtz(float16); ulong16 __ovld __cnfn convert_ulong16_rtp(float16); ulong16 __ovld __cnfn convert_ulong16_sat_rtp(float16); ulong16 __ovld __cnfn convert_ulong16_rtn(float16); ulong16 __ovld __cnfn convert_ulong16_sat_rtn(float16); ulong16 __ovld __cnfn convert_ulong16(float16); ulong16 __ovld __cnfn convert_ulong16_sat(float16); float16 __ovld __cnfn convert_float16_rte(char16); float16 __ovld __cnfn convert_float16_rtz(char16); float16 __ovld __cnfn convert_float16_rtp(char16); float16 __ovld __cnfn convert_float16_rtn(char16); float16 __ovld __cnfn convert_float16(char16); float16 __ovld __cnfn convert_float16_rte(uchar16); float16 __ovld __cnfn convert_float16_rtz(uchar16); float16 __ovld __cnfn convert_float16_rtp(uchar16); float16 __ovld __cnfn convert_float16_rtn(uchar16); float16 __ovld __cnfn convert_float16(uchar16); float16 __ovld __cnfn convert_float16_rte(short16); float16 __ovld __cnfn convert_float16_rtz(short16); float16 __ovld __cnfn convert_float16_rtp(short16); float16 __ovld __cnfn convert_float16_rtn(short16); float16 __ovld __cnfn convert_float16(short16); float16 __ovld __cnfn convert_float16_rte(ushort16); float16 __ovld __cnfn convert_float16_rtz(ushort16); float16 __ovld __cnfn convert_float16_rtp(ushort16); float16 __ovld __cnfn convert_float16_rtn(ushort16); float16 __ovld __cnfn convert_float16(ushort16); float16 __ovld __cnfn convert_float16_rte(int16); float16 __ovld __cnfn convert_float16_rtz(int16); float16 __ovld __cnfn convert_float16_rtp(int16); float16 __ovld __cnfn convert_float16_rtn(int16); float16 __ovld __cnfn convert_float16(int16); float16 __ovld __cnfn convert_float16_rte(uint16); float16 __ovld __cnfn convert_float16_rtz(uint16); float16 __ovld __cnfn convert_float16_rtp(uint16); float16 __ovld __cnfn convert_float16_rtn(uint16); float16 __ovld __cnfn convert_float16(uint16); float16 __ovld __cnfn convert_float16_rte(long16); float16 __ovld __cnfn convert_float16_rtz(long16); float16 __ovld __cnfn convert_float16_rtp(long16); float16 __ovld __cnfn convert_float16_rtn(long16); float16 __ovld __cnfn convert_float16(long16); float16 __ovld __cnfn convert_float16_rte(ulong16); float16 __ovld __cnfn convert_float16_rtz(ulong16); float16 __ovld __cnfn convert_float16_rtp(ulong16); float16 __ovld __cnfn convert_float16_rtn(ulong16); float16 __ovld __cnfn convert_float16(ulong16); float16 __ovld __cnfn convert_float16_rte(float16); float16 __ovld __cnfn convert_float16_rtz(float16); float16 __ovld __cnfn convert_float16_rtp(float16); float16 __ovld __cnfn convert_float16_rtn(float16); float16 __ovld __cnfn convert_float16(float16); // Conversions with double data type parameters or return value. #ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64 : enable char __ovld __cnfn convert_char(double); char __ovld __cnfn convert_char_rte(double); char __ovld __cnfn convert_char_rtn(double); char __ovld __cnfn convert_char_rtp(double); char __ovld __cnfn convert_char_rtz(double); char __ovld __cnfn convert_char_sat(double); char __ovld __cnfn convert_char_sat_rte(double); char __ovld __cnfn convert_char_sat_rtn(double); char __ovld __cnfn convert_char_sat_rtp(double); char __ovld __cnfn convert_char_sat_rtz(double); char2 __ovld __cnfn convert_char2(double2); char2 __ovld __cnfn convert_char2_rte(double2); char2 __ovld __cnfn convert_char2_rtn(double2); char2 __ovld __cnfn convert_char2_rtp(double2); char2 __ovld __cnfn convert_char2_rtz(double2); char2 __ovld __cnfn convert_char2_sat(double2); char2 __ovld __cnfn convert_char2_sat_rte(double2); char2 __ovld __cnfn convert_char2_sat_rtn(double2); char2 __ovld __cnfn convert_char2_sat_rtp(double2); char2 __ovld __cnfn convert_char2_sat_rtz(double2); char3 __ovld __cnfn convert_char3(double3); char3 __ovld __cnfn convert_char3_rte(double3); char3 __ovld __cnfn convert_char3_rtn(double3); char3 __ovld __cnfn convert_char3_rtp(double3); char3 __ovld __cnfn convert_char3_rtz(double3); char3 __ovld __cnfn convert_char3_sat(double3); char3 __ovld __cnfn convert_char3_sat_rte(double3); char3 __ovld __cnfn convert_char3_sat_rtn(double3); char3 __ovld __cnfn convert_char3_sat_rtp(double3); char3 __ovld __cnfn convert_char3_sat_rtz(double3); char4 __ovld __cnfn convert_char4(double4); char4 __ovld __cnfn convert_char4_rte(double4); char4 __ovld __cnfn convert_char4_rtn(double4); char4 __ovld __cnfn convert_char4_rtp(double4); char4 __ovld __cnfn convert_char4_rtz(double4); char4 __ovld __cnfn convert_char4_sat(double4); char4 __ovld __cnfn convert_char4_sat_rte(double4); char4 __ovld __cnfn convert_char4_sat_rtn(double4); char4 __ovld __cnfn convert_char4_sat_rtp(double4); char4 __ovld __cnfn convert_char4_sat_rtz(double4); char8 __ovld __cnfn convert_char8(double8); char8 __ovld __cnfn convert_char8_rte(double8); char8 __ovld __cnfn convert_char8_rtn(double8); char8 __ovld __cnfn convert_char8_rtp(double8); char8 __ovld __cnfn convert_char8_rtz(double8); char8 __ovld __cnfn convert_char8_sat(double8); char8 __ovld __cnfn convert_char8_sat_rte(double8); char8 __ovld __cnfn convert_char8_sat_rtn(double8); char8 __ovld __cnfn convert_char8_sat_rtp(double8); char8 __ovld __cnfn convert_char8_sat_rtz(double8); char16 __ovld __cnfn convert_char16(double16); char16 __ovld __cnfn convert_char16_rte(double16); char16 __ovld __cnfn convert_char16_rtn(double16); char16 __ovld __cnfn convert_char16_rtp(double16); char16 __ovld __cnfn convert_char16_rtz(double16); char16 __ovld __cnfn convert_char16_sat(double16); char16 __ovld __cnfn convert_char16_sat_rte(double16); char16 __ovld __cnfn convert_char16_sat_rtn(double16); char16 __ovld __cnfn convert_char16_sat_rtp(double16); char16 __ovld __cnfn convert_char16_sat_rtz(double16); uchar __ovld __cnfn convert_uchar(double); uchar __ovld __cnfn convert_uchar_rte(double); uchar __ovld __cnfn convert_uchar_rtn(double); uchar __ovld __cnfn convert_uchar_rtp(double); uchar __ovld __cnfn convert_uchar_rtz(double); uchar __ovld __cnfn convert_uchar_sat(double); uchar __ovld __cnfn convert_uchar_sat_rte(double); uchar __ovld __cnfn convert_uchar_sat_rtn(double); uchar __ovld __cnfn convert_uchar_sat_rtp(double); uchar __ovld __cnfn convert_uchar_sat_rtz(double); uchar2 __ovld __cnfn convert_uchar2(double2); uchar2 __ovld __cnfn convert_uchar2_rte(double2); uchar2 __ovld __cnfn convert_uchar2_rtn(double2); uchar2 __ovld __cnfn convert_uchar2_rtp(double2); uchar2 __ovld __cnfn convert_uchar2_rtz(double2); uchar2 __ovld __cnfn convert_uchar2_sat(double2); uchar2 __ovld __cnfn convert_uchar2_sat_rte(double2); uchar2 __ovld __cnfn convert_uchar2_sat_rtn(double2); uchar2 __ovld __cnfn convert_uchar2_sat_rtp(double2); uchar2 __ovld __cnfn convert_uchar2_sat_rtz(double2); uchar3 __ovld __cnfn convert_uchar3(double3); uchar3 __ovld __cnfn convert_uchar3_rte(double3); uchar3 __ovld __cnfn convert_uchar3_rtn(double3); uchar3 __ovld __cnfn convert_uchar3_rtp(double3); uchar3 __ovld __cnfn convert_uchar3_rtz(double3); uchar3 __ovld __cnfn convert_uchar3_sat(double3); uchar3 __ovld __cnfn convert_uchar3_sat_rte(double3); uchar3 __ovld __cnfn convert_uchar3_sat_rtn(double3); uchar3 __ovld __cnfn convert_uchar3_sat_rtp(double3); uchar3 __ovld __cnfn convert_uchar3_sat_rtz(double3); uchar4 __ovld __cnfn convert_uchar4(double4); uchar4 __ovld __cnfn convert_uchar4_rte(double4); uchar4 __ovld __cnfn convert_uchar4_rtn(double4); uchar4 __ovld __cnfn convert_uchar4_rtp(double4); uchar4 __ovld __cnfn convert_uchar4_rtz(double4); uchar4 __ovld __cnfn convert_uchar4_sat(double4); uchar4 __ovld __cnfn convert_uchar4_sat_rte(double4); uchar4 __ovld __cnfn convert_uchar4_sat_rtn(double4); uchar4 __ovld __cnfn convert_uchar4_sat_rtp(double4); uchar4 __ovld __cnfn convert_uchar4_sat_rtz(double4); uchar8 __ovld __cnfn convert_uchar8(double8); uchar8 __ovld __cnfn convert_uchar8_rte(double8); uchar8 __ovld __cnfn convert_uchar8_rtn(double8); uchar8 __ovld __cnfn convert_uchar8_rtp(double8); uchar8 __ovld __cnfn convert_uchar8_rtz(double8); uchar8 __ovld __cnfn convert_uchar8_sat(double8); uchar8 __ovld __cnfn convert_uchar8_sat_rte(double8); uchar8 __ovld __cnfn convert_uchar8_sat_rtn(double8); uchar8 __ovld __cnfn convert_uchar8_sat_rtp(double8); uchar8 __ovld __cnfn convert_uchar8_sat_rtz(double8); uchar16 __ovld __cnfn convert_uchar16(double16); uchar16 __ovld __cnfn convert_uchar16_rte(double16); uchar16 __ovld __cnfn convert_uchar16_rtn(double16); uchar16 __ovld __cnfn convert_uchar16_rtp(double16); uchar16 __ovld __cnfn convert_uchar16_rtz(double16); uchar16 __ovld __cnfn convert_uchar16_sat(double16); uchar16 __ovld __cnfn convert_uchar16_sat_rte(double16); uchar16 __ovld __cnfn convert_uchar16_sat_rtn(double16); uchar16 __ovld __cnfn convert_uchar16_sat_rtp(double16); uchar16 __ovld __cnfn convert_uchar16_sat_rtz(double16); short __ovld __cnfn convert_short(double); short __ovld __cnfn convert_short_rte(double); short __ovld __cnfn convert_short_rtn(double); short __ovld __cnfn convert_short_rtp(double); short __ovld __cnfn convert_short_rtz(double); short __ovld __cnfn convert_short_sat(double); short __ovld __cnfn convert_short_sat_rte(double); short __ovld __cnfn convert_short_sat_rtn(double); short __ovld __cnfn convert_short_sat_rtp(double); short __ovld __cnfn convert_short_sat_rtz(double); short2 __ovld __cnfn convert_short2(double2); short2 __ovld __cnfn convert_short2_rte(double2); short2 __ovld __cnfn convert_short2_rtn(double2); short2 __ovld __cnfn convert_short2_rtp(double2); short2 __ovld __cnfn convert_short2_rtz(double2); short2 __ovld __cnfn convert_short2_sat(double2); short2 __ovld __cnfn convert_short2_sat_rte(double2); short2 __ovld __cnfn convert_short2_sat_rtn(double2); short2 __ovld __cnfn convert_short2_sat_rtp(double2); short2 __ovld __cnfn convert_short2_sat_rtz(double2); short3 __ovld __cnfn convert_short3(double3); short3 __ovld __cnfn convert_short3_rte(double3); short3 __ovld __cnfn convert_short3_rtn(double3); short3 __ovld __cnfn convert_short3_rtp(double3); short3 __ovld __cnfn convert_short3_rtz(double3); short3 __ovld __cnfn convert_short3_sat(double3); short3 __ovld __cnfn convert_short3_sat_rte(double3); short3 __ovld __cnfn convert_short3_sat_rtn(double3); short3 __ovld __cnfn convert_short3_sat_rtp(double3); short3 __ovld __cnfn convert_short3_sat_rtz(double3); short4 __ovld __cnfn convert_short4(double4); short4 __ovld __cnfn convert_short4_rte(double4); short4 __ovld __cnfn convert_short4_rtn(double4); short4 __ovld __cnfn convert_short4_rtp(double4); short4 __ovld __cnfn convert_short4_rtz(double4); short4 __ovld __cnfn convert_short4_sat(double4); short4 __ovld __cnfn convert_short4_sat_rte(double4); short4 __ovld __cnfn convert_short4_sat_rtn(double4); short4 __ovld __cnfn convert_short4_sat_rtp(double4); short4 __ovld __cnfn convert_short4_sat_rtz(double4); short8 __ovld __cnfn convert_short8(double8); short8 __ovld __cnfn convert_short8_rte(double8); short8 __ovld __cnfn convert_short8_rtn(double8); short8 __ovld __cnfn convert_short8_rtp(double8); short8 __ovld __cnfn convert_short8_rtz(double8); short8 __ovld __cnfn convert_short8_sat(double8); short8 __ovld __cnfn convert_short8_sat_rte(double8); short8 __ovld __cnfn convert_short8_sat_rtn(double8); short8 __ovld __cnfn convert_short8_sat_rtp(double8); short8 __ovld __cnfn convert_short8_sat_rtz(double8); short16 __ovld __cnfn convert_short16(double16); short16 __ovld __cnfn convert_short16_rte(double16); short16 __ovld __cnfn convert_short16_rtn(double16); short16 __ovld __cnfn convert_short16_rtp(double16); short16 __ovld __cnfn convert_short16_rtz(double16); short16 __ovld __cnfn convert_short16_sat(double16); short16 __ovld __cnfn convert_short16_sat_rte(double16); short16 __ovld __cnfn convert_short16_sat_rtn(double16); short16 __ovld __cnfn convert_short16_sat_rtp(double16); short16 __ovld __cnfn convert_short16_sat_rtz(double16); ushort __ovld __cnfn convert_ushort(double); ushort __ovld __cnfn convert_ushort_rte(double); ushort __ovld __cnfn convert_ushort_rtn(double); ushort __ovld __cnfn convert_ushort_rtp(double); ushort __ovld __cnfn convert_ushort_rtz(double); ushort __ovld __cnfn convert_ushort_sat(double); ushort __ovld __cnfn convert_ushort_sat_rte(double); ushort __ovld __cnfn convert_ushort_sat_rtn(double); ushort __ovld __cnfn convert_ushort_sat_rtp(double); ushort __ovld __cnfn convert_ushort_sat_rtz(double); ushort2 __ovld __cnfn convert_ushort2(double2); ushort2 __ovld __cnfn convert_ushort2_rte(double2); ushort2 __ovld __cnfn convert_ushort2_rtn(double2); ushort2 __ovld __cnfn convert_ushort2_rtp(double2); ushort2 __ovld __cnfn convert_ushort2_rtz(double2); ushort2 __ovld __cnfn convert_ushort2_sat(double2); ushort2 __ovld __cnfn convert_ushort2_sat_rte(double2); ushort2 __ovld __cnfn convert_ushort2_sat_rtn(double2); ushort2 __ovld __cnfn convert_ushort2_sat_rtp(double2); ushort2 __ovld __cnfn convert_ushort2_sat_rtz(double2); ushort3 __ovld __cnfn convert_ushort3(double3); ushort3 __ovld __cnfn convert_ushort3_rte(double3); ushort3 __ovld __cnfn convert_ushort3_rtn(double3); ushort3 __ovld __cnfn convert_ushort3_rtp(double3); ushort3 __ovld __cnfn convert_ushort3_rtz(double3); ushort3 __ovld __cnfn convert_ushort3_sat(double3); ushort3 __ovld __cnfn convert_ushort3_sat_rte(double3); ushort3 __ovld __cnfn convert_ushort3_sat_rtn(double3); ushort3 __ovld __cnfn convert_ushort3_sat_rtp(double3); ushort3 __ovld __cnfn convert_ushort3_sat_rtz(double3); ushort4 __ovld __cnfn convert_ushort4(double4); ushort4 __ovld __cnfn convert_ushort4_rte(double4); ushort4 __ovld __cnfn convert_ushort4_rtn(double4); ushort4 __ovld __cnfn convert_ushort4_rtp(double4); ushort4 __ovld __cnfn convert_ushort4_rtz(double4); ushort4 __ovld __cnfn convert_ushort4_sat(double4); ushort4 __ovld __cnfn convert_ushort4_sat_rte(double4); ushort4 __ovld __cnfn convert_ushort4_sat_rtn(double4); ushort4 __ovld __cnfn convert_ushort4_sat_rtp(double4); ushort4 __ovld __cnfn convert_ushort4_sat_rtz(double4); ushort8 __ovld __cnfn convert_ushort8(double8); ushort8 __ovld __cnfn convert_ushort8_rte(double8); ushort8 __ovld __cnfn convert_ushort8_rtn(double8); ushort8 __ovld __cnfn convert_ushort8_rtp(double8); ushort8 __ovld __cnfn convert_ushort8_rtz(double8); ushort8 __ovld __cnfn convert_ushort8_sat(double8); ushort8 __ovld __cnfn convert_ushort8_sat_rte(double8); ushort8 __ovld __cnfn convert_ushort8_sat_rtn(double8); ushort8 __ovld __cnfn convert_ushort8_sat_rtp(double8); ushort8 __ovld __cnfn convert_ushort8_sat_rtz(double8); ushort16 __ovld __cnfn convert_ushort16(double16); ushort16 __ovld __cnfn convert_ushort16_rte(double16); ushort16 __ovld __cnfn convert_ushort16_rtn(double16); ushort16 __ovld __cnfn convert_ushort16_rtp(double16); ushort16 __ovld __cnfn convert_ushort16_rtz(double16); ushort16 __ovld __cnfn convert_ushort16_sat(double16); ushort16 __ovld __cnfn convert_ushort16_sat_rte(double16); ushort16 __ovld __cnfn convert_ushort16_sat_rtn(double16); ushort16 __ovld __cnfn convert_ushort16_sat_rtp(double16); ushort16 __ovld __cnfn convert_ushort16_sat_rtz(double16); int __ovld __cnfn convert_int(double); int __ovld __cnfn convert_int_rte(double); int __ovld __cnfn convert_int_rtn(double); int __ovld __cnfn convert_int_rtp(double); int __ovld __cnfn convert_int_rtz(double); int __ovld __cnfn convert_int_sat(double); int __ovld __cnfn convert_int_sat_rte(double); int __ovld __cnfn convert_int_sat_rtn(double); int __ovld __cnfn convert_int_sat_rtp(double); int __ovld __cnfn convert_int_sat_rtz(double); int2 __ovld __cnfn convert_int2(double2); int2 __ovld __cnfn convert_int2_rte(double2); int2 __ovld __cnfn convert_int2_rtn(double2); int2 __ovld __cnfn convert_int2_rtp(double2); int2 __ovld __cnfn convert_int2_rtz(double2); int2 __ovld __cnfn convert_int2_sat(double2); int2 __ovld __cnfn convert_int2_sat_rte(double2); int2 __ovld __cnfn convert_int2_sat_rtn(double2); int2 __ovld __cnfn convert_int2_sat_rtp(double2); int2 __ovld __cnfn convert_int2_sat_rtz(double2); int3 __ovld __cnfn convert_int3(double3); int3 __ovld __cnfn convert_int3_rte(double3); int3 __ovld __cnfn convert_int3_rtn(double3); int3 __ovld __cnfn convert_int3_rtp(double3); int3 __ovld __cnfn convert_int3_rtz(double3); int3 __ovld __cnfn convert_int3_sat(double3); int3 __ovld __cnfn convert_int3_sat_rte(double3); int3 __ovld __cnfn convert_int3_sat_rtn(double3); int3 __ovld __cnfn convert_int3_sat_rtp(double3); int3 __ovld __cnfn convert_int3_sat_rtz(double3); int4 __ovld __cnfn convert_int4(double4); int4 __ovld __cnfn convert_int4_rte(double4); int4 __ovld __cnfn convert_int4_rtn(double4); int4 __ovld __cnfn convert_int4_rtp(double4); int4 __ovld __cnfn convert_int4_rtz(double4); int4 __ovld __cnfn convert_int4_sat(double4); int4 __ovld __cnfn convert_int4_sat_rte(double4); int4 __ovld __cnfn convert_int4_sat_rtn(double4); int4 __ovld __cnfn convert_int4_sat_rtp(double4); int4 __ovld __cnfn convert_int4_sat_rtz(double4); int8 __ovld __cnfn convert_int8(double8); int8 __ovld __cnfn convert_int8_rte(double8); int8 __ovld __cnfn convert_int8_rtn(double8); int8 __ovld __cnfn convert_int8_rtp(double8); int8 __ovld __cnfn convert_int8_rtz(double8); int8 __ovld __cnfn convert_int8_sat(double8); int8 __ovld __cnfn convert_int8_sat_rte(double8); int8 __ovld __cnfn convert_int8_sat_rtn(double8); int8 __ovld __cnfn convert_int8_sat_rtp(double8); int8 __ovld __cnfn convert_int8_sat_rtz(double8); int16 __ovld __cnfn convert_int16(double16); int16 __ovld __cnfn convert_int16_rte(double16); int16 __ovld __cnfn convert_int16_rtn(double16); int16 __ovld __cnfn convert_int16_rtp(double16); int16 __ovld __cnfn convert_int16_rtz(double16); int16 __ovld __cnfn convert_int16_sat(double16); int16 __ovld __cnfn convert_int16_sat_rte(double16); int16 __ovld __cnfn convert_int16_sat_rtn(double16); int16 __ovld __cnfn convert_int16_sat_rtp(double16); int16 __ovld __cnfn convert_int16_sat_rtz(double16); uint __ovld __cnfn convert_uint(double); uint __ovld __cnfn convert_uint_rte(double); uint __ovld __cnfn convert_uint_rtn(double); uint __ovld __cnfn convert_uint_rtp(double); uint __ovld __cnfn convert_uint_rtz(double); uint __ovld __cnfn convert_uint_sat(double); uint __ovld __cnfn convert_uint_sat_rte(double); uint __ovld __cnfn convert_uint_sat_rtn(double); uint __ovld __cnfn convert_uint_sat_rtp(double); uint __ovld __cnfn convert_uint_sat_rtz(double); uint2 __ovld __cnfn convert_uint2(double2); uint2 __ovld __cnfn convert_uint2_rte(double2); uint2 __ovld __cnfn convert_uint2_rtn(double2); uint2 __ovld __cnfn convert_uint2_rtp(double2); uint2 __ovld __cnfn convert_uint2_rtz(double2); uint2 __ovld __cnfn convert_uint2_sat(double2); uint2 __ovld __cnfn convert_uint2_sat_rte(double2); uint2 __ovld __cnfn convert_uint2_sat_rtn(double2); uint2 __ovld __cnfn convert_uint2_sat_rtp(double2); uint2 __ovld __cnfn convert_uint2_sat_rtz(double2); uint3 __ovld __cnfn convert_uint3(double3); uint3 __ovld __cnfn convert_uint3_rte(double3); uint3 __ovld __cnfn convert_uint3_rtn(double3); uint3 __ovld __cnfn convert_uint3_rtp(double3); uint3 __ovld __cnfn convert_uint3_rtz(double3); uint3 __ovld __cnfn convert_uint3_sat(double3); uint3 __ovld __cnfn convert_uint3_sat_rte(double3); uint3 __ovld __cnfn convert_uint3_sat_rtn(double3); uint3 __ovld __cnfn convert_uint3_sat_rtp(double3); uint3 __ovld __cnfn convert_uint3_sat_rtz(double3); uint4 __ovld __cnfn convert_uint4(double4); uint4 __ovld __cnfn convert_uint4_rte(double4); uint4 __ovld __cnfn convert_uint4_rtn(double4); uint4 __ovld __cnfn convert_uint4_rtp(double4); uint4 __ovld __cnfn convert_uint4_rtz(double4); uint4 __ovld __cnfn convert_uint4_sat(double4); uint4 __ovld __cnfn convert_uint4_sat_rte(double4); uint4 __ovld __cnfn convert_uint4_sat_rtn(double4); uint4 __ovld __cnfn convert_uint4_sat_rtp(double4); uint4 __ovld __cnfn convert_uint4_sat_rtz(double4); uint8 __ovld __cnfn convert_uint8(double8); uint8 __ovld __cnfn convert_uint8_rte(double8); uint8 __ovld __cnfn convert_uint8_rtn(double8); uint8 __ovld __cnfn convert_uint8_rtp(double8); uint8 __ovld __cnfn convert_uint8_rtz(double8); uint8 __ovld __cnfn convert_uint8_sat(double8); uint8 __ovld __cnfn convert_uint8_sat_rte(double8); uint8 __ovld __cnfn convert_uint8_sat_rtn(double8); uint8 __ovld __cnfn convert_uint8_sat_rtp(double8); uint8 __ovld __cnfn convert_uint8_sat_rtz(double8); uint16 __ovld __cnfn convert_uint16(double16); uint16 __ovld __cnfn convert_uint16_rte(double16); uint16 __ovld __cnfn convert_uint16_rtn(double16); uint16 __ovld __cnfn convert_uint16_rtp(double16); uint16 __ovld __cnfn convert_uint16_rtz(double16); uint16 __ovld __cnfn convert_uint16_sat(double16); uint16 __ovld __cnfn convert_uint16_sat_rte(double16); uint16 __ovld __cnfn convert_uint16_sat_rtn(double16); uint16 __ovld __cnfn convert_uint16_sat_rtp(double16); uint16 __ovld __cnfn convert_uint16_sat_rtz(double16); long __ovld __cnfn convert_long(double); long __ovld __cnfn convert_long_rte(double); long __ovld __cnfn convert_long_rtn(double); long __ovld __cnfn convert_long_rtp(double); long __ovld __cnfn convert_long_rtz(double); long __ovld __cnfn convert_long_sat(double); long __ovld __cnfn convert_long_sat_rte(double); long __ovld __cnfn convert_long_sat_rtn(double); long __ovld __cnfn convert_long_sat_rtp(double); long __ovld __cnfn convert_long_sat_rtz(double); long2 __ovld __cnfn convert_long2(double2); long2 __ovld __cnfn convert_long2_rte(double2); long2 __ovld __cnfn convert_long2_rtn(double2); long2 __ovld __cnfn convert_long2_rtp(double2); long2 __ovld __cnfn convert_long2_rtz(double2); long2 __ovld __cnfn convert_long2_sat(double2); long2 __ovld __cnfn convert_long2_sat_rte(double2); long2 __ovld __cnfn convert_long2_sat_rtn(double2); long2 __ovld __cnfn convert_long2_sat_rtp(double2); long2 __ovld __cnfn convert_long2_sat_rtz(double2); long3 __ovld __cnfn convert_long3(double3); long3 __ovld __cnfn convert_long3_rte(double3); long3 __ovld __cnfn convert_long3_rtn(double3); long3 __ovld __cnfn convert_long3_rtp(double3); long3 __ovld __cnfn convert_long3_rtz(double3); long3 __ovld __cnfn convert_long3_sat(double3); long3 __ovld __cnfn convert_long3_sat_rte(double3); long3 __ovld __cnfn convert_long3_sat_rtn(double3); long3 __ovld __cnfn convert_long3_sat_rtp(double3); long3 __ovld __cnfn convert_long3_sat_rtz(double3); long4 __ovld __cnfn convert_long4(double4); long4 __ovld __cnfn convert_long4_rte(double4); long4 __ovld __cnfn convert_long4_rtn(double4); long4 __ovld __cnfn convert_long4_rtp(double4); long4 __ovld __cnfn convert_long4_rtz(double4); long4 __ovld __cnfn convert_long4_sat(double4); long4 __ovld __cnfn convert_long4_sat_rte(double4); long4 __ovld __cnfn convert_long4_sat_rtn(double4); long4 __ovld __cnfn convert_long4_sat_rtp(double4); long4 __ovld __cnfn convert_long4_sat_rtz(double4); long8 __ovld __cnfn convert_long8(double8); long8 __ovld __cnfn convert_long8_rte(double8); long8 __ovld __cnfn convert_long8_rtn(double8); long8 __ovld __cnfn convert_long8_rtp(double8); long8 __ovld __cnfn convert_long8_rtz(double8); long8 __ovld __cnfn convert_long8_sat(double8); long8 __ovld __cnfn convert_long8_sat_rte(double8); long8 __ovld __cnfn convert_long8_sat_rtn(double8); long8 __ovld __cnfn convert_long8_sat_rtp(double8); long8 __ovld __cnfn convert_long8_sat_rtz(double8); long16 __ovld __cnfn convert_long16(double16); long16 __ovld __cnfn convert_long16_rte(double16); long16 __ovld __cnfn convert_long16_rtn(double16); long16 __ovld __cnfn convert_long16_rtp(double16); long16 __ovld __cnfn convert_long16_rtz(double16); long16 __ovld __cnfn convert_long16_sat(double16); long16 __ovld __cnfn convert_long16_sat_rte(double16); long16 __ovld __cnfn convert_long16_sat_rtn(double16); long16 __ovld __cnfn convert_long16_sat_rtp(double16); long16 __ovld __cnfn convert_long16_sat_rtz(double16); ulong __ovld __cnfn convert_ulong(double); ulong __ovld __cnfn convert_ulong_rte(double); ulong __ovld __cnfn convert_ulong_rtn(double); ulong __ovld __cnfn convert_ulong_rtp(double); ulong __ovld __cnfn convert_ulong_rtz(double); ulong __ovld __cnfn convert_ulong_sat(double); ulong __ovld __cnfn convert_ulong_sat_rte(double); ulong __ovld __cnfn convert_ulong_sat_rtn(double); ulong __ovld __cnfn convert_ulong_sat_rtp(double); ulong __ovld __cnfn convert_ulong_sat_rtz(double); ulong2 __ovld __cnfn convert_ulong2(double2); ulong2 __ovld __cnfn convert_ulong2_rte(double2); ulong2 __ovld __cnfn convert_ulong2_rtn(double2); ulong2 __ovld __cnfn convert_ulong2_rtp(double2); ulong2 __ovld __cnfn convert_ulong2_rtz(double2); ulong2 __ovld __cnfn convert_ulong2_sat(double2); ulong2 __ovld __cnfn convert_ulong2_sat_rte(double2); ulong2 __ovld __cnfn convert_ulong2_sat_rtn(double2); ulong2 __ovld __cnfn convert_ulong2_sat_rtp(double2); ulong2 __ovld __cnfn convert_ulong2_sat_rtz(double2); ulong3 __ovld __cnfn convert_ulong3(double3); ulong3 __ovld __cnfn convert_ulong3_rte(double3); ulong3 __ovld __cnfn convert_ulong3_rtn(double3); ulong3 __ovld __cnfn convert_ulong3_rtp(double3); ulong3 __ovld __cnfn convert_ulong3_rtz(double3); ulong3 __ovld __cnfn convert_ulong3_sat(double3); ulong3 __ovld __cnfn convert_ulong3_sat_rte(double3); ulong3 __ovld __cnfn convert_ulong3_sat_rtn(double3); ulong3 __ovld __cnfn convert_ulong3_sat_rtp(double3); ulong3 __ovld __cnfn convert_ulong3_sat_rtz(double3); ulong4 __ovld __cnfn convert_ulong4(double4); ulong4 __ovld __cnfn convert_ulong4_rte(double4); ulong4 __ovld __cnfn convert_ulong4_rtn(double4); ulong4 __ovld __cnfn convert_ulong4_rtp(double4); ulong4 __ovld __cnfn convert_ulong4_rtz(double4); ulong4 __ovld __cnfn convert_ulong4_sat(double4); ulong4 __ovld __cnfn convert_ulong4_sat_rte(double4); ulong4 __ovld __cnfn convert_ulong4_sat_rtn(double4); ulong4 __ovld __cnfn convert_ulong4_sat_rtp(double4); ulong4 __ovld __cnfn convert_ulong4_sat_rtz(double4); ulong8 __ovld __cnfn convert_ulong8(double8); ulong8 __ovld __cnfn convert_ulong8_rte(double8); ulong8 __ovld __cnfn convert_ulong8_rtn(double8); ulong8 __ovld __cnfn convert_ulong8_rtp(double8); ulong8 __ovld __cnfn convert_ulong8_rtz(double8); ulong8 __ovld __cnfn convert_ulong8_sat(double8); ulong8 __ovld __cnfn convert_ulong8_sat_rte(double8); ulong8 __ovld __cnfn convert_ulong8_sat_rtn(double8); ulong8 __ovld __cnfn convert_ulong8_sat_rtp(double8); ulong8 __ovld __cnfn convert_ulong8_sat_rtz(double8); ulong16 __ovld __cnfn convert_ulong16(double16); ulong16 __ovld __cnfn convert_ulong16_rte(double16); ulong16 __ovld __cnfn convert_ulong16_rtn(double16); ulong16 __ovld __cnfn convert_ulong16_rtp(double16); ulong16 __ovld __cnfn convert_ulong16_rtz(double16); ulong16 __ovld __cnfn convert_ulong16_sat(double16); ulong16 __ovld __cnfn convert_ulong16_sat_rte(double16); ulong16 __ovld __cnfn convert_ulong16_sat_rtn(double16); ulong16 __ovld __cnfn convert_ulong16_sat_rtp(double16); ulong16 __ovld __cnfn convert_ulong16_sat_rtz(double16); float __ovld __cnfn convert_float(double); float __ovld __cnfn convert_float_rte(double); float __ovld __cnfn convert_float_rtn(double); float __ovld __cnfn convert_float_rtp(double); float __ovld __cnfn convert_float_rtz(double); float2 __ovld __cnfn convert_float2(double2); float2 __ovld __cnfn convert_float2_rte(double2); float2 __ovld __cnfn convert_float2_rtn(double2); float2 __ovld __cnfn convert_float2_rtp(double2); float2 __ovld __cnfn convert_float2_rtz(double2); float3 __ovld __cnfn convert_float3(double3); float3 __ovld __cnfn convert_float3_rte(double3); float3 __ovld __cnfn convert_float3_rtn(double3); float3 __ovld __cnfn convert_float3_rtp(double3); float3 __ovld __cnfn convert_float3_rtz(double3); float4 __ovld __cnfn convert_float4(double4); float4 __ovld __cnfn convert_float4_rte(double4); float4 __ovld __cnfn convert_float4_rtn(double4); float4 __ovld __cnfn convert_float4_rtp(double4); float4 __ovld __cnfn convert_float4_rtz(double4); float8 __ovld __cnfn convert_float8(double8); float8 __ovld __cnfn convert_float8_rte(double8); float8 __ovld __cnfn convert_float8_rtn(double8); float8 __ovld __cnfn convert_float8_rtp(double8); float8 __ovld __cnfn convert_float8_rtz(double8); float16 __ovld __cnfn convert_float16(double16); float16 __ovld __cnfn convert_float16_rte(double16); float16 __ovld __cnfn convert_float16_rtn(double16); float16 __ovld __cnfn convert_float16_rtp(double16); float16 __ovld __cnfn convert_float16_rtz(double16); double __ovld __cnfn convert_double(char); double __ovld __cnfn convert_double(double); double __ovld __cnfn convert_double(float); double __ovld __cnfn convert_double(int); double __ovld __cnfn convert_double(long); double __ovld __cnfn convert_double(short); double __ovld __cnfn convert_double(uchar); double __ovld __cnfn convert_double(uint); double __ovld __cnfn convert_double(ulong); double __ovld __cnfn convert_double(ushort); double __ovld __cnfn convert_double_rte(char); double __ovld __cnfn convert_double_rte(double); double __ovld __cnfn convert_double_rte(float); double __ovld __cnfn convert_double_rte(int); double __ovld __cnfn convert_double_rte(long); double __ovld __cnfn convert_double_rte(short); double __ovld __cnfn convert_double_rte(uchar); double __ovld __cnfn convert_double_rte(uint); double __ovld __cnfn convert_double_rte(ulong); double __ovld __cnfn convert_double_rte(ushort); double __ovld __cnfn convert_double_rtn(char); double __ovld __cnfn convert_double_rtn(double); double __ovld __cnfn convert_double_rtn(float); double __ovld __cnfn convert_double_rtn(int); double __ovld __cnfn convert_double_rtn(long); double __ovld __cnfn convert_double_rtn(short); double __ovld __cnfn convert_double_rtn(uchar); double __ovld __cnfn convert_double_rtn(uint); double __ovld __cnfn convert_double_rtn(ulong); double __ovld __cnfn convert_double_rtn(ushort); double __ovld __cnfn convert_double_rtp(char); double __ovld __cnfn convert_double_rtp(double); double __ovld __cnfn convert_double_rtp(float); double __ovld __cnfn convert_double_rtp(int); double __ovld __cnfn convert_double_rtp(long); double __ovld __cnfn convert_double_rtp(short); double __ovld __cnfn convert_double_rtp(uchar); double __ovld __cnfn convert_double_rtp(uint); double __ovld __cnfn convert_double_rtp(ulong); double __ovld __cnfn convert_double_rtp(ushort); double __ovld __cnfn convert_double_rtz(char); double __ovld __cnfn convert_double_rtz(double); double __ovld __cnfn convert_double_rtz(float); double __ovld __cnfn convert_double_rtz(int); double __ovld __cnfn convert_double_rtz(long); double __ovld __cnfn convert_double_rtz(short); double __ovld __cnfn convert_double_rtz(uchar); double __ovld __cnfn convert_double_rtz(uint); double __ovld __cnfn convert_double_rtz(ulong); double __ovld __cnfn convert_double_rtz(ushort); double2 __ovld __cnfn convert_double2(char2); double2 __ovld __cnfn convert_double2(double2); double2 __ovld __cnfn convert_double2(float2); double2 __ovld __cnfn convert_double2(int2); double2 __ovld __cnfn convert_double2(long2); double2 __ovld __cnfn convert_double2(short2); double2 __ovld __cnfn convert_double2(uchar2); double2 __ovld __cnfn convert_double2(uint2); double2 __ovld __cnfn convert_double2(ulong2); double2 __ovld __cnfn convert_double2(ushort2); double2 __ovld __cnfn convert_double2_rte(char2); double2 __ovld __cnfn convert_double2_rte(double2); double2 __ovld __cnfn convert_double2_rte(float2); double2 __ovld __cnfn convert_double2_rte(int2); double2 __ovld __cnfn convert_double2_rte(long2); double2 __ovld __cnfn convert_double2_rte(short2); double2 __ovld __cnfn convert_double2_rte(uchar2); double2 __ovld __cnfn convert_double2_rte(uint2); double2 __ovld __cnfn convert_double2_rte(ulong2); double2 __ovld __cnfn convert_double2_rte(ushort2); double2 __ovld __cnfn convert_double2_rtn(char2); double2 __ovld __cnfn convert_double2_rtn(double2); double2 __ovld __cnfn convert_double2_rtn(float2); double2 __ovld __cnfn convert_double2_rtn(int2); double2 __ovld __cnfn convert_double2_rtn(long2); double2 __ovld __cnfn convert_double2_rtn(short2); double2 __ovld __cnfn convert_double2_rtn(uchar2); double2 __ovld __cnfn convert_double2_rtn(uint2); double2 __ovld __cnfn convert_double2_rtn(ulong2); double2 __ovld __cnfn convert_double2_rtn(ushort2); double2 __ovld __cnfn convert_double2_rtp(char2); double2 __ovld __cnfn convert_double2_rtp(double2); double2 __ovld __cnfn convert_double2_rtp(float2); double2 __ovld __cnfn convert_double2_rtp(int2); double2 __ovld __cnfn convert_double2_rtp(long2); double2 __ovld __cnfn convert_double2_rtp(short2); double2 __ovld __cnfn convert_double2_rtp(uchar2); double2 __ovld __cnfn convert_double2_rtp(uint2); double2 __ovld __cnfn convert_double2_rtp(ulong2); double2 __ovld __cnfn convert_double2_rtp(ushort2); double2 __ovld __cnfn convert_double2_rtz(char2); double2 __ovld __cnfn convert_double2_rtz(double2); double2 __ovld __cnfn convert_double2_rtz(float2); double2 __ovld __cnfn convert_double2_rtz(int2); double2 __ovld __cnfn convert_double2_rtz(long2); double2 __ovld __cnfn convert_double2_rtz(short2); double2 __ovld __cnfn convert_double2_rtz(uchar2); double2 __ovld __cnfn convert_double2_rtz(uint2); double2 __ovld __cnfn convert_double2_rtz(ulong2); double2 __ovld __cnfn convert_double2_rtz(ushort2); double3 __ovld __cnfn convert_double3(char3); double3 __ovld __cnfn convert_double3(double3); double3 __ovld __cnfn convert_double3(float3); double3 __ovld __cnfn convert_double3(int3); double3 __ovld __cnfn convert_double3(long3); double3 __ovld __cnfn convert_double3(short3); double3 __ovld __cnfn convert_double3(uchar3); double3 __ovld __cnfn convert_double3(uint3); double3 __ovld __cnfn convert_double3(ulong3); double3 __ovld __cnfn convert_double3(ushort3); double3 __ovld __cnfn convert_double3_rte(char3); double3 __ovld __cnfn convert_double3_rte(double3); double3 __ovld __cnfn convert_double3_rte(float3); double3 __ovld __cnfn convert_double3_rte(int3); double3 __ovld __cnfn convert_double3_rte(long3); double3 __ovld __cnfn convert_double3_rte(short3); double3 __ovld __cnfn convert_double3_rte(uchar3); double3 __ovld __cnfn convert_double3_rte(uint3); double3 __ovld __cnfn convert_double3_rte(ulong3); double3 __ovld __cnfn convert_double3_rte(ushort3); double3 __ovld __cnfn convert_double3_rtn(char3); double3 __ovld __cnfn convert_double3_rtn(double3); double3 __ovld __cnfn convert_double3_rtn(float3); double3 __ovld __cnfn convert_double3_rtn(int3); double3 __ovld __cnfn convert_double3_rtn(long3); double3 __ovld __cnfn convert_double3_rtn(short3); double3 __ovld __cnfn convert_double3_rtn(uchar3); double3 __ovld __cnfn convert_double3_rtn(uint3); double3 __ovld __cnfn convert_double3_rtn(ulong3); double3 __ovld __cnfn convert_double3_rtn(ushort3); double3 __ovld __cnfn convert_double3_rtp(char3); double3 __ovld __cnfn convert_double3_rtp(double3); double3 __ovld __cnfn convert_double3_rtp(float3); double3 __ovld __cnfn convert_double3_rtp(int3); double3 __ovld __cnfn convert_double3_rtp(long3); double3 __ovld __cnfn convert_double3_rtp(short3); double3 __ovld __cnfn convert_double3_rtp(uchar3); double3 __ovld __cnfn convert_double3_rtp(uint3); double3 __ovld __cnfn convert_double3_rtp(ulong3); double3 __ovld __cnfn convert_double3_rtp(ushort3); double3 __ovld __cnfn convert_double3_rtz(char3); double3 __ovld __cnfn convert_double3_rtz(double3); double3 __ovld __cnfn convert_double3_rtz(float3); double3 __ovld __cnfn convert_double3_rtz(int3); double3 __ovld __cnfn convert_double3_rtz(long3); double3 __ovld __cnfn convert_double3_rtz(short3); double3 __ovld __cnfn convert_double3_rtz(uchar3); double3 __ovld __cnfn convert_double3_rtz(uint3); double3 __ovld __cnfn convert_double3_rtz(ulong3); double3 __ovld __cnfn convert_double3_rtz(ushort3); double4 __ovld __cnfn convert_double4(char4); double4 __ovld __cnfn convert_double4(double4); double4 __ovld __cnfn convert_double4(float4); double4 __ovld __cnfn convert_double4(int4); double4 __ovld __cnfn convert_double4(long4); double4 __ovld __cnfn convert_double4(short4); double4 __ovld __cnfn convert_double4(uchar4); double4 __ovld __cnfn convert_double4(uint4); double4 __ovld __cnfn convert_double4(ulong4); double4 __ovld __cnfn convert_double4(ushort4); double4 __ovld __cnfn convert_double4_rte(char4); double4 __ovld __cnfn convert_double4_rte(double4); double4 __ovld __cnfn convert_double4_rte(float4); double4 __ovld __cnfn convert_double4_rte(int4); double4 __ovld __cnfn convert_double4_rte(long4); double4 __ovld __cnfn convert_double4_rte(short4); double4 __ovld __cnfn convert_double4_rte(uchar4); double4 __ovld __cnfn convert_double4_rte(uint4); double4 __ovld __cnfn convert_double4_rte(ulong4); double4 __ovld __cnfn convert_double4_rte(ushort4); double4 __ovld __cnfn convert_double4_rtn(char4); double4 __ovld __cnfn convert_double4_rtn(double4); double4 __ovld __cnfn convert_double4_rtn(float4); double4 __ovld __cnfn convert_double4_rtn(int4); double4 __ovld __cnfn convert_double4_rtn(long4); double4 __ovld __cnfn convert_double4_rtn(short4); double4 __ovld __cnfn convert_double4_rtn(uchar4); double4 __ovld __cnfn convert_double4_rtn(uint4); double4 __ovld __cnfn convert_double4_rtn(ulong4); double4 __ovld __cnfn convert_double4_rtn(ushort4); double4 __ovld __cnfn convert_double4_rtp(char4); double4 __ovld __cnfn convert_double4_rtp(double4); double4 __ovld __cnfn convert_double4_rtp(float4); double4 __ovld __cnfn convert_double4_rtp(int4); double4 __ovld __cnfn convert_double4_rtp(long4); double4 __ovld __cnfn convert_double4_rtp(short4); double4 __ovld __cnfn convert_double4_rtp(uchar4); double4 __ovld __cnfn convert_double4_rtp(uint4); double4 __ovld __cnfn convert_double4_rtp(ulong4); double4 __ovld __cnfn convert_double4_rtp(ushort4); double4 __ovld __cnfn convert_double4_rtz(char4); double4 __ovld __cnfn convert_double4_rtz(double4); double4 __ovld __cnfn convert_double4_rtz(float4); double4 __ovld __cnfn convert_double4_rtz(int4); double4 __ovld __cnfn convert_double4_rtz(long4); double4 __ovld __cnfn convert_double4_rtz(short4); double4 __ovld __cnfn convert_double4_rtz(uchar4); double4 __ovld __cnfn convert_double4_rtz(uint4); double4 __ovld __cnfn convert_double4_rtz(ulong4); double4 __ovld __cnfn convert_double4_rtz(ushort4); double8 __ovld __cnfn convert_double8(char8); double8 __ovld __cnfn convert_double8(double8); double8 __ovld __cnfn convert_double8(float8); double8 __ovld __cnfn convert_double8(int8); double8 __ovld __cnfn convert_double8(long8); double8 __ovld __cnfn convert_double8(short8); double8 __ovld __cnfn convert_double8(uchar8); double8 __ovld __cnfn convert_double8(uint8); double8 __ovld __cnfn convert_double8(ulong8); double8 __ovld __cnfn convert_double8(ushort8); double8 __ovld __cnfn convert_double8_rte(char8); double8 __ovld __cnfn convert_double8_rte(double8); double8 __ovld __cnfn convert_double8_rte(float8); double8 __ovld __cnfn convert_double8_rte(int8); double8 __ovld __cnfn convert_double8_rte(long8); double8 __ovld __cnfn convert_double8_rte(short8); double8 __ovld __cnfn convert_double8_rte(uchar8); double8 __ovld __cnfn convert_double8_rte(uint8); double8 __ovld __cnfn convert_double8_rte(ulong8); double8 __ovld __cnfn convert_double8_rte(ushort8); double8 __ovld __cnfn convert_double8_rtn(char8); double8 __ovld __cnfn convert_double8_rtn(double8); double8 __ovld __cnfn convert_double8_rtn(float8); double8 __ovld __cnfn convert_double8_rtn(int8); double8 __ovld __cnfn convert_double8_rtn(long8); double8 __ovld __cnfn convert_double8_rtn(short8); double8 __ovld __cnfn convert_double8_rtn(uchar8); double8 __ovld __cnfn convert_double8_rtn(uint8); double8 __ovld __cnfn convert_double8_rtn(ulong8); double8 __ovld __cnfn convert_double8_rtn(ushort8); double8 __ovld __cnfn convert_double8_rtp(char8); double8 __ovld __cnfn convert_double8_rtp(double8); double8 __ovld __cnfn convert_double8_rtp(float8); double8 __ovld __cnfn convert_double8_rtp(int8); double8 __ovld __cnfn convert_double8_rtp(long8); double8 __ovld __cnfn convert_double8_rtp(short8); double8 __ovld __cnfn convert_double8_rtp(uchar8); double8 __ovld __cnfn convert_double8_rtp(uint8); double8 __ovld __cnfn convert_double8_rtp(ulong8); double8 __ovld __cnfn convert_double8_rtp(ushort8); double8 __ovld __cnfn convert_double8_rtz(char8); double8 __ovld __cnfn convert_double8_rtz(double8); double8 __ovld __cnfn convert_double8_rtz(float8); double8 __ovld __cnfn convert_double8_rtz(int8); double8 __ovld __cnfn convert_double8_rtz(long8); double8 __ovld __cnfn convert_double8_rtz(short8); double8 __ovld __cnfn convert_double8_rtz(uchar8); double8 __ovld __cnfn convert_double8_rtz(uint8); double8 __ovld __cnfn convert_double8_rtz(ulong8); double8 __ovld __cnfn convert_double8_rtz(ushort8); double16 __ovld __cnfn convert_double16(char16); double16 __ovld __cnfn convert_double16(double16); double16 __ovld __cnfn convert_double16(float16); double16 __ovld __cnfn convert_double16(int16); double16 __ovld __cnfn convert_double16(long16); double16 __ovld __cnfn convert_double16(short16); double16 __ovld __cnfn convert_double16(uchar16); double16 __ovld __cnfn convert_double16(uint16); double16 __ovld __cnfn convert_double16(ulong16); double16 __ovld __cnfn convert_double16(ushort16); double16 __ovld __cnfn convert_double16_rte(char16); double16 __ovld __cnfn convert_double16_rte(double16); double16 __ovld __cnfn convert_double16_rte(float16); double16 __ovld __cnfn convert_double16_rte(int16); double16 __ovld __cnfn convert_double16_rte(long16); double16 __ovld __cnfn convert_double16_rte(short16); double16 __ovld __cnfn convert_double16_rte(uchar16); double16 __ovld __cnfn convert_double16_rte(uint16); double16 __ovld __cnfn convert_double16_rte(ulong16); double16 __ovld __cnfn convert_double16_rte(ushort16); double16 __ovld __cnfn convert_double16_rtn(char16); double16 __ovld __cnfn convert_double16_rtn(double16); double16 __ovld __cnfn convert_double16_rtn(float16); double16 __ovld __cnfn convert_double16_rtn(int16); double16 __ovld __cnfn convert_double16_rtn(long16); double16 __ovld __cnfn convert_double16_rtn(short16); double16 __ovld __cnfn convert_double16_rtn(uchar16); double16 __ovld __cnfn convert_double16_rtn(uint16); double16 __ovld __cnfn convert_double16_rtn(ulong16); double16 __ovld __cnfn convert_double16_rtn(ushort16); double16 __ovld __cnfn convert_double16_rtp(char16); double16 __ovld __cnfn convert_double16_rtp(double16); double16 __ovld __cnfn convert_double16_rtp(float16); double16 __ovld __cnfn convert_double16_rtp(int16); double16 __ovld __cnfn convert_double16_rtp(long16); double16 __ovld __cnfn convert_double16_rtp(short16); double16 __ovld __cnfn convert_double16_rtp(uchar16); double16 __ovld __cnfn convert_double16_rtp(uint16); double16 __ovld __cnfn convert_double16_rtp(ulong16); double16 __ovld __cnfn convert_double16_rtp(ushort16); double16 __ovld __cnfn convert_double16_rtz(char16); double16 __ovld __cnfn convert_double16_rtz(double16); double16 __ovld __cnfn convert_double16_rtz(float16); double16 __ovld __cnfn convert_double16_rtz(int16); double16 __ovld __cnfn convert_double16_rtz(long16); double16 __ovld __cnfn convert_double16_rtz(short16); double16 __ovld __cnfn convert_double16_rtz(uchar16); double16 __ovld __cnfn convert_double16_rtz(uint16); double16 __ovld __cnfn convert_double16_rtz(ulong16); double16 __ovld __cnfn convert_double16_rtz(ushort16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 #pragma OPENCL EXTENSION cl_khr_fp16 : enable // Convert half types to non-double types. uchar __ovld __cnfn convert_uchar(half); uchar __ovld __cnfn convert_uchar_rte(half); uchar __ovld __cnfn convert_uchar_rtp(half); uchar __ovld __cnfn convert_uchar_rtn(half); uchar __ovld __cnfn convert_uchar_rtz(half); uchar __ovld __cnfn convert_uchar_sat(half); uchar __ovld __cnfn convert_uchar_sat_rte(half); uchar __ovld __cnfn convert_uchar_sat_rtp(half); uchar __ovld __cnfn convert_uchar_sat_rtn(half); uchar __ovld __cnfn convert_uchar_sat_rtz(half); uchar2 __ovld __cnfn convert_uchar2(half2); uchar2 __ovld __cnfn convert_uchar2_rte(half2); uchar2 __ovld __cnfn convert_uchar2_rtp(half2); uchar2 __ovld __cnfn convert_uchar2_rtn(half2); uchar2 __ovld __cnfn convert_uchar2_rtz(half2); uchar2 __ovld __cnfn convert_uchar2_sat(half2); uchar2 __ovld __cnfn convert_uchar2_sat_rte(half2); uchar2 __ovld __cnfn convert_uchar2_sat_rtp(half2); uchar2 __ovld __cnfn convert_uchar2_sat_rtn(half2); uchar2 __ovld __cnfn convert_uchar2_sat_rtz(half2); uchar3 __ovld __cnfn convert_uchar3(half3); uchar3 __ovld __cnfn convert_uchar3_rte(half3); uchar3 __ovld __cnfn convert_uchar3_rtp(half3); uchar3 __ovld __cnfn convert_uchar3_rtn(half3); uchar3 __ovld __cnfn convert_uchar3_rtz(half3); uchar3 __ovld __cnfn convert_uchar3_sat(half3); uchar3 __ovld __cnfn convert_uchar3_sat_rte(half3); uchar3 __ovld __cnfn convert_uchar3_sat_rtp(half3); uchar3 __ovld __cnfn convert_uchar3_sat_rtn(half3); uchar3 __ovld __cnfn convert_uchar3_sat_rtz(half3); uchar4 __ovld __cnfn convert_uchar4(half4); uchar4 __ovld __cnfn convert_uchar4_rte(half4); uchar4 __ovld __cnfn convert_uchar4_rtp(half4); uchar4 __ovld __cnfn convert_uchar4_rtn(half4); uchar4 __ovld __cnfn convert_uchar4_rtz(half4); uchar4 __ovld __cnfn convert_uchar4_sat(half4); uchar4 __ovld __cnfn convert_uchar4_sat_rte(half4); uchar4 __ovld __cnfn convert_uchar4_sat_rtp(half4); uchar4 __ovld __cnfn convert_uchar4_sat_rtn(half4); uchar4 __ovld __cnfn convert_uchar4_sat_rtz(half4); uchar8 __ovld __cnfn convert_uchar8(half8); uchar8 __ovld __cnfn convert_uchar8_rte(half8); uchar8 __ovld __cnfn convert_uchar8_rtp(half8); uchar8 __ovld __cnfn convert_uchar8_rtn(half8); uchar8 __ovld __cnfn convert_uchar8_rtz(half8); uchar8 __ovld __cnfn convert_uchar8_sat(half8); uchar8 __ovld __cnfn convert_uchar8_sat_rte(half8); uchar8 __ovld __cnfn convert_uchar8_sat_rtp(half8); uchar8 __ovld __cnfn convert_uchar8_sat_rtn(half8); uchar8 __ovld __cnfn convert_uchar8_sat_rtz(half8); uchar16 __ovld __cnfn convert_uchar16(half16); uchar16 __ovld __cnfn convert_uchar16_rte(half16); uchar16 __ovld __cnfn convert_uchar16_rtp(half16); uchar16 __ovld __cnfn convert_uchar16_rtn(half16); uchar16 __ovld __cnfn convert_uchar16_rtz(half16); uchar16 __ovld __cnfn convert_uchar16_sat(half16); uchar16 __ovld __cnfn convert_uchar16_sat_rte(half16); uchar16 __ovld __cnfn convert_uchar16_sat_rtp(half16); uchar16 __ovld __cnfn convert_uchar16_sat_rtn(half16); uchar16 __ovld __cnfn convert_uchar16_sat_rtz(half16); ushort __ovld __cnfn convert_ushort(half); ushort __ovld __cnfn convert_ushort_rte(half); ushort __ovld __cnfn convert_ushort_rtp(half); ushort __ovld __cnfn convert_ushort_rtn(half); ushort __ovld __cnfn convert_ushort_rtz(half); ushort __ovld __cnfn convert_ushort_sat(half); ushort __ovld __cnfn convert_ushort_sat_rte(half); ushort __ovld __cnfn convert_ushort_sat_rtp(half); ushort __ovld __cnfn convert_ushort_sat_rtn(half); ushort __ovld __cnfn convert_ushort_sat_rtz(half); ushort2 __ovld __cnfn convert_ushort2(half2); ushort2 __ovld __cnfn convert_ushort2_rte(half2); ushort2 __ovld __cnfn convert_ushort2_rtp(half2); ushort2 __ovld __cnfn convert_ushort2_rtn(half2); ushort2 __ovld __cnfn convert_ushort2_rtz(half2); ushort2 __ovld __cnfn convert_ushort2_sat(half2); ushort2 __ovld __cnfn convert_ushort2_sat_rte(half2); ushort2 __ovld __cnfn convert_ushort2_sat_rtp(half2); ushort2 __ovld __cnfn convert_ushort2_sat_rtn(half2); ushort2 __ovld __cnfn convert_ushort2_sat_rtz(half2); ushort3 __ovld __cnfn convert_ushort3(half3); ushort3 __ovld __cnfn convert_ushort3_rte(half3); ushort3 __ovld __cnfn convert_ushort3_rtp(half3); ushort3 __ovld __cnfn convert_ushort3_rtn(half3); ushort3 __ovld __cnfn convert_ushort3_rtz(half3); ushort3 __ovld __cnfn convert_ushort3_sat(half3); ushort3 __ovld __cnfn convert_ushort3_sat_rte(half3); ushort3 __ovld __cnfn convert_ushort3_sat_rtp(half3); ushort3 __ovld __cnfn convert_ushort3_sat_rtn(half3); ushort3 __ovld __cnfn convert_ushort3_sat_rtz(half3); ushort4 __ovld __cnfn convert_ushort4(half4); ushort4 __ovld __cnfn convert_ushort4_rte(half4); ushort4 __ovld __cnfn convert_ushort4_rtp(half4); ushort4 __ovld __cnfn convert_ushort4_rtn(half4); ushort4 __ovld __cnfn convert_ushort4_rtz(half4); ushort4 __ovld __cnfn convert_ushort4_sat(half4); ushort4 __ovld __cnfn convert_ushort4_sat_rte(half4); ushort4 __ovld __cnfn convert_ushort4_sat_rtp(half4); ushort4 __ovld __cnfn convert_ushort4_sat_rtn(half4); ushort4 __ovld __cnfn convert_ushort4_sat_rtz(half4); ushort8 __ovld __cnfn convert_ushort8(half8); ushort8 __ovld __cnfn convert_ushort8_rte(half8); ushort8 __ovld __cnfn convert_ushort8_rtp(half8); ushort8 __ovld __cnfn convert_ushort8_rtn(half8); ushort8 __ovld __cnfn convert_ushort8_rtz(half8); ushort8 __ovld __cnfn convert_ushort8_sat(half8); ushort8 __ovld __cnfn convert_ushort8_sat_rte(half8); ushort8 __ovld __cnfn convert_ushort8_sat_rtp(half8); ushort8 __ovld __cnfn convert_ushort8_sat_rtn(half8); ushort8 __ovld __cnfn convert_ushort8_sat_rtz(half8); ushort16 __ovld __cnfn convert_ushort16(half16); ushort16 __ovld __cnfn convert_ushort16_rte(half16); ushort16 __ovld __cnfn convert_ushort16_rtp(half16); ushort16 __ovld __cnfn convert_ushort16_rtn(half16); ushort16 __ovld __cnfn convert_ushort16_rtz(half16); ushort16 __ovld __cnfn convert_ushort16_sat(half16); ushort16 __ovld __cnfn convert_ushort16_sat_rte(half16); ushort16 __ovld __cnfn convert_ushort16_sat_rtp(half16); ushort16 __ovld __cnfn convert_ushort16_sat_rtn(half16); ushort16 __ovld __cnfn convert_ushort16_sat_rtz(half16); uint __ovld __cnfn convert_uint(half); uint __ovld __cnfn convert_uint_rte(half); uint __ovld __cnfn convert_uint_rtp(half); uint __ovld __cnfn convert_uint_rtn(half); uint __ovld __cnfn convert_uint_rtz(half); uint __ovld __cnfn convert_uint_sat(half); uint __ovld __cnfn convert_uint_sat_rte(half); uint __ovld __cnfn convert_uint_sat_rtp(half); uint __ovld __cnfn convert_uint_sat_rtn(half); uint __ovld __cnfn convert_uint_sat_rtz(half); uint2 __ovld __cnfn convert_uint2(half2); uint2 __ovld __cnfn convert_uint2_rte(half2); uint2 __ovld __cnfn convert_uint2_rtp(half2); uint2 __ovld __cnfn convert_uint2_rtn(half2); uint2 __ovld __cnfn convert_uint2_rtz(half2); uint2 __ovld __cnfn convert_uint2_sat(half2); uint2 __ovld __cnfn convert_uint2_sat_rte(half2); uint2 __ovld __cnfn convert_uint2_sat_rtp(half2); uint2 __ovld __cnfn convert_uint2_sat_rtn(half2); uint2 __ovld __cnfn convert_uint2_sat_rtz(half2); uint3 __ovld __cnfn convert_uint3(half3); uint3 __ovld __cnfn convert_uint3_rte(half3); uint3 __ovld __cnfn convert_uint3_rtp(half3); uint3 __ovld __cnfn convert_uint3_rtn(half3); uint3 __ovld __cnfn convert_uint3_rtz(half3); uint3 __ovld __cnfn convert_uint3_sat(half3); uint3 __ovld __cnfn convert_uint3_sat_rte(half3); uint3 __ovld __cnfn convert_uint3_sat_rtp(half3); uint3 __ovld __cnfn convert_uint3_sat_rtn(half3); uint3 __ovld __cnfn convert_uint3_sat_rtz(half3); uint4 __ovld __cnfn convert_uint4(half4); uint4 __ovld __cnfn convert_uint4_rte(half4); uint4 __ovld __cnfn convert_uint4_rtp(half4); uint4 __ovld __cnfn convert_uint4_rtn(half4); uint4 __ovld __cnfn convert_uint4_rtz(half4); uint4 __ovld __cnfn convert_uint4_sat(half4); uint4 __ovld __cnfn convert_uint4_sat_rte(half4); uint4 __ovld __cnfn convert_uint4_sat_rtp(half4); uint4 __ovld __cnfn convert_uint4_sat_rtn(half4); uint4 __ovld __cnfn convert_uint4_sat_rtz(half4); uint8 __ovld __cnfn convert_uint8(half8); uint8 __ovld __cnfn convert_uint8_rte(half8); uint8 __ovld __cnfn convert_uint8_rtp(half8); uint8 __ovld __cnfn convert_uint8_rtn(half8); uint8 __ovld __cnfn convert_uint8_rtz(half8); uint8 __ovld __cnfn convert_uint8_sat(half8); uint8 __ovld __cnfn convert_uint8_sat_rte(half8); uint8 __ovld __cnfn convert_uint8_sat_rtp(half8); uint8 __ovld __cnfn convert_uint8_sat_rtn(half8); uint8 __ovld __cnfn convert_uint8_sat_rtz(half8); uint16 __ovld __cnfn convert_uint16(half16); uint16 __ovld __cnfn convert_uint16_rte(half16); uint16 __ovld __cnfn convert_uint16_rtp(half16); uint16 __ovld __cnfn convert_uint16_rtn(half16); uint16 __ovld __cnfn convert_uint16_rtz(half16); uint16 __ovld __cnfn convert_uint16_sat(half16); uint16 __ovld __cnfn convert_uint16_sat_rte(half16); uint16 __ovld __cnfn convert_uint16_sat_rtp(half16); uint16 __ovld __cnfn convert_uint16_sat_rtn(half16); uint16 __ovld __cnfn convert_uint16_sat_rtz(half16); ulong __ovld __cnfn convert_ulong(half); ulong __ovld __cnfn convert_ulong_rte(half); ulong __ovld __cnfn convert_ulong_rtp(half); ulong __ovld __cnfn convert_ulong_rtn(half); ulong __ovld __cnfn convert_ulong_rtz(half); ulong __ovld __cnfn convert_ulong_sat(half); ulong __ovld __cnfn convert_ulong_sat_rte(half); ulong __ovld __cnfn convert_ulong_sat_rtp(half); ulong __ovld __cnfn convert_ulong_sat_rtn(half); ulong __ovld __cnfn convert_ulong_sat_rtz(half); ulong2 __ovld __cnfn convert_ulong2(half2); ulong2 __ovld __cnfn convert_ulong2_rte(half2); ulong2 __ovld __cnfn convert_ulong2_rtp(half2); ulong2 __ovld __cnfn convert_ulong2_rtn(half2); ulong2 __ovld __cnfn convert_ulong2_rtz(half2); ulong2 __ovld __cnfn convert_ulong2_sat(half2); ulong2 __ovld __cnfn convert_ulong2_sat_rte(half2); ulong2 __ovld __cnfn convert_ulong2_sat_rtp(half2); ulong2 __ovld __cnfn convert_ulong2_sat_rtn(half2); ulong2 __ovld __cnfn convert_ulong2_sat_rtz(half2); ulong3 __ovld __cnfn convert_ulong3(half3); ulong3 __ovld __cnfn convert_ulong3_rte(half3); ulong3 __ovld __cnfn convert_ulong3_rtp(half3); ulong3 __ovld __cnfn convert_ulong3_rtn(half3); ulong3 __ovld __cnfn convert_ulong3_rtz(half3); ulong3 __ovld __cnfn convert_ulong3_sat(half3); ulong3 __ovld __cnfn convert_ulong3_sat_rte(half3); ulong3 __ovld __cnfn convert_ulong3_sat_rtp(half3); ulong3 __ovld __cnfn convert_ulong3_sat_rtn(half3); ulong3 __ovld __cnfn convert_ulong3_sat_rtz(half3); ulong4 __ovld __cnfn convert_ulong4(half4); ulong4 __ovld __cnfn convert_ulong4_rte(half4); ulong4 __ovld __cnfn convert_ulong4_rtp(half4); ulong4 __ovld __cnfn convert_ulong4_rtn(half4); ulong4 __ovld __cnfn convert_ulong4_rtz(half4); ulong4 __ovld __cnfn convert_ulong4_sat(half4); ulong4 __ovld __cnfn convert_ulong4_sat_rte(half4); ulong4 __ovld __cnfn convert_ulong4_sat_rtp(half4); ulong4 __ovld __cnfn convert_ulong4_sat_rtn(half4); ulong4 __ovld __cnfn convert_ulong4_sat_rtz(half4); ulong8 __ovld __cnfn convert_ulong8(half8); ulong8 __ovld __cnfn convert_ulong8_rte(half8); ulong8 __ovld __cnfn convert_ulong8_rtp(half8); ulong8 __ovld __cnfn convert_ulong8_rtn(half8); ulong8 __ovld __cnfn convert_ulong8_rtz(half8); ulong8 __ovld __cnfn convert_ulong8_sat(half8); ulong8 __ovld __cnfn convert_ulong8_sat_rte(half8); ulong8 __ovld __cnfn convert_ulong8_sat_rtp(half8); ulong8 __ovld __cnfn convert_ulong8_sat_rtn(half8); ulong8 __ovld __cnfn convert_ulong8_sat_rtz(half8); ulong16 __ovld __cnfn convert_ulong16(half16); ulong16 __ovld __cnfn convert_ulong16_rte(half16); ulong16 __ovld __cnfn convert_ulong16_rtp(half16); ulong16 __ovld __cnfn convert_ulong16_rtn(half16); ulong16 __ovld __cnfn convert_ulong16_rtz(half16); ulong16 __ovld __cnfn convert_ulong16_sat(half16); ulong16 __ovld __cnfn convert_ulong16_sat_rte(half16); ulong16 __ovld __cnfn convert_ulong16_sat_rtp(half16); ulong16 __ovld __cnfn convert_ulong16_sat_rtn(half16); ulong16 __ovld __cnfn convert_ulong16_sat_rtz(half16); char __ovld __cnfn convert_char(half); char __ovld __cnfn convert_char_rte(half); char __ovld __cnfn convert_char_rtp(half); char __ovld __cnfn convert_char_rtn(half); char __ovld __cnfn convert_char_rtz(half); char __ovld __cnfn convert_char_sat(half); char __ovld __cnfn convert_char_sat_rte(half); char __ovld __cnfn convert_char_sat_rtp(half); char __ovld __cnfn convert_char_sat_rtn(half); char __ovld __cnfn convert_char_sat_rtz(half); char2 __ovld __cnfn convert_char2(half2); char2 __ovld __cnfn convert_char2_rte(half2); char2 __ovld __cnfn convert_char2_rtp(half2); char2 __ovld __cnfn convert_char2_rtn(half2); char2 __ovld __cnfn convert_char2_rtz(half2); char2 __ovld __cnfn convert_char2_sat(half2); char2 __ovld __cnfn convert_char2_sat_rte(half2); char2 __ovld __cnfn convert_char2_sat_rtp(half2); char2 __ovld __cnfn convert_char2_sat_rtn(half2); char2 __ovld __cnfn convert_char2_sat_rtz(half2); char3 __ovld __cnfn convert_char3(half3); char3 __ovld __cnfn convert_char3_rte(half3); char3 __ovld __cnfn convert_char3_rtp(half3); char3 __ovld __cnfn convert_char3_rtn(half3); char3 __ovld __cnfn convert_char3_rtz(half3); char3 __ovld __cnfn convert_char3_sat(half3); char3 __ovld __cnfn convert_char3_sat_rte(half3); char3 __ovld __cnfn convert_char3_sat_rtp(half3); char3 __ovld __cnfn convert_char3_sat_rtn(half3); char3 __ovld __cnfn convert_char3_sat_rtz(half3); char4 __ovld __cnfn convert_char4(half4); char4 __ovld __cnfn convert_char4_rte(half4); char4 __ovld __cnfn convert_char4_rtp(half4); char4 __ovld __cnfn convert_char4_rtn(half4); char4 __ovld __cnfn convert_char4_rtz(half4); char4 __ovld __cnfn convert_char4_sat(half4); char4 __ovld __cnfn convert_char4_sat_rte(half4); char4 __ovld __cnfn convert_char4_sat_rtp(half4); char4 __ovld __cnfn convert_char4_sat_rtn(half4); char4 __ovld __cnfn convert_char4_sat_rtz(half4); char8 __ovld __cnfn convert_char8(half8); char8 __ovld __cnfn convert_char8_rte(half8); char8 __ovld __cnfn convert_char8_rtp(half8); char8 __ovld __cnfn convert_char8_rtn(half8); char8 __ovld __cnfn convert_char8_rtz(half8); char8 __ovld __cnfn convert_char8_sat(half8); char8 __ovld __cnfn convert_char8_sat_rte(half8); char8 __ovld __cnfn convert_char8_sat_rtp(half8); char8 __ovld __cnfn convert_char8_sat_rtn(half8); char8 __ovld __cnfn convert_char8_sat_rtz(half8); char16 __ovld __cnfn convert_char16(half16); char16 __ovld __cnfn convert_char16_rte(half16); char16 __ovld __cnfn convert_char16_rtp(half16); char16 __ovld __cnfn convert_char16_rtn(half16); char16 __ovld __cnfn convert_char16_rtz(half16); char16 __ovld __cnfn convert_char16_sat(half16); char16 __ovld __cnfn convert_char16_sat_rte(half16); char16 __ovld __cnfn convert_char16_sat_rtp(half16); char16 __ovld __cnfn convert_char16_sat_rtn(half16); char16 __ovld __cnfn convert_char16_sat_rtz(half16); short __ovld __cnfn convert_short(half); short __ovld __cnfn convert_short_rte(half); short __ovld __cnfn convert_short_rtp(half); short __ovld __cnfn convert_short_rtn(half); short __ovld __cnfn convert_short_rtz(half); short __ovld __cnfn convert_short_sat(half); short __ovld __cnfn convert_short_sat_rte(half); short __ovld __cnfn convert_short_sat_rtp(half); short __ovld __cnfn convert_short_sat_rtn(half); short __ovld __cnfn convert_short_sat_rtz(half); short2 __ovld __cnfn convert_short2(half2); short2 __ovld __cnfn convert_short2_rte(half2); short2 __ovld __cnfn convert_short2_rtp(half2); short2 __ovld __cnfn convert_short2_rtn(half2); short2 __ovld __cnfn convert_short2_rtz(half2); short2 __ovld __cnfn convert_short2_sat(half2); short2 __ovld __cnfn convert_short2_sat_rte(half2); short2 __ovld __cnfn convert_short2_sat_rtp(half2); short2 __ovld __cnfn convert_short2_sat_rtn(half2); short2 __ovld __cnfn convert_short2_sat_rtz(half2); short3 __ovld __cnfn convert_short3(half3); short3 __ovld __cnfn convert_short3_rte(half3); short3 __ovld __cnfn convert_short3_rtp(half3); short3 __ovld __cnfn convert_short3_rtn(half3); short3 __ovld __cnfn convert_short3_rtz(half3); short3 __ovld __cnfn convert_short3_sat(half3); short3 __ovld __cnfn convert_short3_sat_rte(half3); short3 __ovld __cnfn convert_short3_sat_rtp(half3); short3 __ovld __cnfn convert_short3_sat_rtn(half3); short3 __ovld __cnfn convert_short3_sat_rtz(half3); short4 __ovld __cnfn convert_short4(half4); short4 __ovld __cnfn convert_short4_rte(half4); short4 __ovld __cnfn convert_short4_rtp(half4); short4 __ovld __cnfn convert_short4_rtn(half4); short4 __ovld __cnfn convert_short4_rtz(half4); short4 __ovld __cnfn convert_short4_sat(half4); short4 __ovld __cnfn convert_short4_sat_rte(half4); short4 __ovld __cnfn convert_short4_sat_rtp(half4); short4 __ovld __cnfn convert_short4_sat_rtn(half4); short4 __ovld __cnfn convert_short4_sat_rtz(half4); short8 __ovld __cnfn convert_short8(half8); short8 __ovld __cnfn convert_short8_rte(half8); short8 __ovld __cnfn convert_short8_rtp(half8); short8 __ovld __cnfn convert_short8_rtn(half8); short8 __ovld __cnfn convert_short8_rtz(half8); short8 __ovld __cnfn convert_short8_sat(half8); short8 __ovld __cnfn convert_short8_sat_rte(half8); short8 __ovld __cnfn convert_short8_sat_rtp(half8); short8 __ovld __cnfn convert_short8_sat_rtn(half8); short8 __ovld __cnfn convert_short8_sat_rtz(half8); short16 __ovld __cnfn convert_short16(half16); short16 __ovld __cnfn convert_short16_rte(half16); short16 __ovld __cnfn convert_short16_rtp(half16); short16 __ovld __cnfn convert_short16_rtn(half16); short16 __ovld __cnfn convert_short16_rtz(half16); short16 __ovld __cnfn convert_short16_sat(half16); short16 __ovld __cnfn convert_short16_sat_rte(half16); short16 __ovld __cnfn convert_short16_sat_rtp(half16); short16 __ovld __cnfn convert_short16_sat_rtn(half16); short16 __ovld __cnfn convert_short16_sat_rtz(half16); int __ovld __cnfn convert_int(half); int __ovld __cnfn convert_int_rte(half); int __ovld __cnfn convert_int_rtp(half); int __ovld __cnfn convert_int_rtn(half); int __ovld __cnfn convert_int_rtz(half); int __ovld __cnfn convert_int_sat(half); int __ovld __cnfn convert_int_sat_rte(half); int __ovld __cnfn convert_int_sat_rtp(half); int __ovld __cnfn convert_int_sat_rtn(half); int __ovld __cnfn convert_int_sat_rtz(half); int2 __ovld __cnfn convert_int2(half2); int2 __ovld __cnfn convert_int2_rte(half2); int2 __ovld __cnfn convert_int2_rtp(half2); int2 __ovld __cnfn convert_int2_rtn(half2); int2 __ovld __cnfn convert_int2_rtz(half2); int2 __ovld __cnfn convert_int2_sat(half2); int2 __ovld __cnfn convert_int2_sat_rte(half2); int2 __ovld __cnfn convert_int2_sat_rtp(half2); int2 __ovld __cnfn convert_int2_sat_rtn(half2); int2 __ovld __cnfn convert_int2_sat_rtz(half2); int3 __ovld __cnfn convert_int3(half3); int3 __ovld __cnfn convert_int3_rte(half3); int3 __ovld __cnfn convert_int3_rtp(half3); int3 __ovld __cnfn convert_int3_rtn(half3); int3 __ovld __cnfn convert_int3_rtz(half3); int3 __ovld __cnfn convert_int3_sat(half3); int3 __ovld __cnfn convert_int3_sat_rte(half3); int3 __ovld __cnfn convert_int3_sat_rtp(half3); int3 __ovld __cnfn convert_int3_sat_rtn(half3); int3 __ovld __cnfn convert_int3_sat_rtz(half3); int4 __ovld __cnfn convert_int4(half4); int4 __ovld __cnfn convert_int4_rte(half4); int4 __ovld __cnfn convert_int4_rtp(half4); int4 __ovld __cnfn convert_int4_rtn(half4); int4 __ovld __cnfn convert_int4_rtz(half4); int4 __ovld __cnfn convert_int4_sat(half4); int4 __ovld __cnfn convert_int4_sat_rte(half4); int4 __ovld __cnfn convert_int4_sat_rtp(half4); int4 __ovld __cnfn convert_int4_sat_rtn(half4); int4 __ovld __cnfn convert_int4_sat_rtz(half4); int8 __ovld __cnfn convert_int8(half8); int8 __ovld __cnfn convert_int8_rte(half8); int8 __ovld __cnfn convert_int8_rtp(half8); int8 __ovld __cnfn convert_int8_rtn(half8); int8 __ovld __cnfn convert_int8_rtz(half8); int8 __ovld __cnfn convert_int8_sat(half8); int8 __ovld __cnfn convert_int8_sat_rte(half8); int8 __ovld __cnfn convert_int8_sat_rtp(half8); int8 __ovld __cnfn convert_int8_sat_rtn(half8); int8 __ovld __cnfn convert_int8_sat_rtz(half8); int16 __ovld __cnfn convert_int16(half16); int16 __ovld __cnfn convert_int16_rte(half16); int16 __ovld __cnfn convert_int16_rtp(half16); int16 __ovld __cnfn convert_int16_rtn(half16); int16 __ovld __cnfn convert_int16_rtz(half16); int16 __ovld __cnfn convert_int16_sat(half16); int16 __ovld __cnfn convert_int16_sat_rte(half16); int16 __ovld __cnfn convert_int16_sat_rtp(half16); int16 __ovld __cnfn convert_int16_sat_rtn(half16); int16 __ovld __cnfn convert_int16_sat_rtz(half16); long __ovld __cnfn convert_long(half); long __ovld __cnfn convert_long_rte(half); long __ovld __cnfn convert_long_rtp(half); long __ovld __cnfn convert_long_rtn(half); long __ovld __cnfn convert_long_rtz(half); long __ovld __cnfn convert_long_sat(half); long __ovld __cnfn convert_long_sat_rte(half); long __ovld __cnfn convert_long_sat_rtp(half); long __ovld __cnfn convert_long_sat_rtn(half); long __ovld __cnfn convert_long_sat_rtz(half); long2 __ovld __cnfn convert_long2(half2); long2 __ovld __cnfn convert_long2_rte(half2); long2 __ovld __cnfn convert_long2_rtp(half2); long2 __ovld __cnfn convert_long2_rtn(half2); long2 __ovld __cnfn convert_long2_rtz(half2); long2 __ovld __cnfn convert_long2_sat(half2); long2 __ovld __cnfn convert_long2_sat_rte(half2); long2 __ovld __cnfn convert_long2_sat_rtp(half2); long2 __ovld __cnfn convert_long2_sat_rtn(half2); long2 __ovld __cnfn convert_long2_sat_rtz(half2); long3 __ovld __cnfn convert_long3(half3); long3 __ovld __cnfn convert_long3_rte(half3); long3 __ovld __cnfn convert_long3_rtp(half3); long3 __ovld __cnfn convert_long3_rtn(half3); long3 __ovld __cnfn convert_long3_rtz(half3); long3 __ovld __cnfn convert_long3_sat(half3); long3 __ovld __cnfn convert_long3_sat_rte(half3); long3 __ovld __cnfn convert_long3_sat_rtp(half3); long3 __ovld __cnfn convert_long3_sat_rtn(half3); long3 __ovld __cnfn convert_long3_sat_rtz(half3); long4 __ovld __cnfn convert_long4(half4); long4 __ovld __cnfn convert_long4_rte(half4); long4 __ovld __cnfn convert_long4_rtp(half4); long4 __ovld __cnfn convert_long4_rtn(half4); long4 __ovld __cnfn convert_long4_rtz(half4); long4 __ovld __cnfn convert_long4_sat(half4); long4 __ovld __cnfn convert_long4_sat_rte(half4); long4 __ovld __cnfn convert_long4_sat_rtp(half4); long4 __ovld __cnfn convert_long4_sat_rtn(half4); long4 __ovld __cnfn convert_long4_sat_rtz(half4); long8 __ovld __cnfn convert_long8(half8); long8 __ovld __cnfn convert_long8_rte(half8); long8 __ovld __cnfn convert_long8_rtp(half8); long8 __ovld __cnfn convert_long8_rtn(half8); long8 __ovld __cnfn convert_long8_rtz(half8); long8 __ovld __cnfn convert_long8_sat(half8); long8 __ovld __cnfn convert_long8_sat_rte(half8); long8 __ovld __cnfn convert_long8_sat_rtp(half8); long8 __ovld __cnfn convert_long8_sat_rtn(half8); long8 __ovld __cnfn convert_long8_sat_rtz(half8); long16 __ovld __cnfn convert_long16(half16); long16 __ovld __cnfn convert_long16_rte(half16); long16 __ovld __cnfn convert_long16_rtp(half16); long16 __ovld __cnfn convert_long16_rtn(half16); long16 __ovld __cnfn convert_long16_rtz(half16); long16 __ovld __cnfn convert_long16_sat(half16); long16 __ovld __cnfn convert_long16_sat_rte(half16); long16 __ovld __cnfn convert_long16_sat_rtp(half16); long16 __ovld __cnfn convert_long16_sat_rtn(half16); long16 __ovld __cnfn convert_long16_sat_rtz(half16); float __ovld __cnfn convert_float(half); float __ovld __cnfn convert_float_rte(half); float __ovld __cnfn convert_float_rtp(half); float __ovld __cnfn convert_float_rtn(half); float __ovld __cnfn convert_float_rtz(half); float2 __ovld __cnfn convert_float2(half2); float2 __ovld __cnfn convert_float2_rte(half2); float2 __ovld __cnfn convert_float2_rtp(half2); float2 __ovld __cnfn convert_float2_rtn(half2); float2 __ovld __cnfn convert_float2_rtz(half2); float3 __ovld __cnfn convert_float3(half3); float3 __ovld __cnfn convert_float3_rte(half3); float3 __ovld __cnfn convert_float3_rtp(half3); float3 __ovld __cnfn convert_float3_rtn(half3); float3 __ovld __cnfn convert_float3_rtz(half3); float4 __ovld __cnfn convert_float4(half4); float4 __ovld __cnfn convert_float4_rte(half4); float4 __ovld __cnfn convert_float4_rtp(half4); float4 __ovld __cnfn convert_float4_rtn(half4); float4 __ovld __cnfn convert_float4_rtz(half4); float8 __ovld __cnfn convert_float8(half8); float8 __ovld __cnfn convert_float8_rte(half8); float8 __ovld __cnfn convert_float8_rtp(half8); float8 __ovld __cnfn convert_float8_rtn(half8); float8 __ovld __cnfn convert_float8_rtz(half8); float16 __ovld __cnfn convert_float16(half16); float16 __ovld __cnfn convert_float16_rte(half16); float16 __ovld __cnfn convert_float16_rtp(half16); float16 __ovld __cnfn convert_float16_rtn(half16); float16 __ovld __cnfn convert_float16_rtz(half16); // Convert non-double types to half types. half __ovld __cnfn convert_half(uchar); half __ovld __cnfn convert_half(ushort); half __ovld __cnfn convert_half(uint); half __ovld __cnfn convert_half(ulong); half __ovld __cnfn convert_half(char); half __ovld __cnfn convert_half(short); half __ovld __cnfn convert_half(int); half __ovld __cnfn convert_half(long); half __ovld __cnfn convert_half(float); half __ovld __cnfn convert_half(half); half __ovld __cnfn convert_half_rte(uchar); half __ovld __cnfn convert_half_rte(ushort); half __ovld __cnfn convert_half_rte(uint); half __ovld __cnfn convert_half_rte(ulong); half __ovld __cnfn convert_half_rte(char); half __ovld __cnfn convert_half_rte(short); half __ovld __cnfn convert_half_rte(int); half __ovld __cnfn convert_half_rte(long); half __ovld __cnfn convert_half_rte(float); half __ovld __cnfn convert_half_rte(half); half __ovld __cnfn convert_half_rtp(uchar); half __ovld __cnfn convert_half_rtp(ushort); half __ovld __cnfn convert_half_rtp(uint); half __ovld __cnfn convert_half_rtp(ulong); half __ovld __cnfn convert_half_rtp(char); half __ovld __cnfn convert_half_rtp(short); half __ovld __cnfn convert_half_rtp(int); half __ovld __cnfn convert_half_rtp(long); half __ovld __cnfn convert_half_rtp(float); half __ovld __cnfn convert_half_rtp(half); half __ovld __cnfn convert_half_rtn(uchar); half __ovld __cnfn convert_half_rtn(ushort); half __ovld __cnfn convert_half_rtn(uint); half __ovld __cnfn convert_half_rtn(ulong); half __ovld __cnfn convert_half_rtn(char); half __ovld __cnfn convert_half_rtn(short); half __ovld __cnfn convert_half_rtn(int); half __ovld __cnfn convert_half_rtn(long); half __ovld __cnfn convert_half_rtn(float); half __ovld __cnfn convert_half_rtn(half); half __ovld __cnfn convert_half_rtz(uchar); half __ovld __cnfn convert_half_rtz(ushort); half __ovld __cnfn convert_half_rtz(uint); half __ovld __cnfn convert_half_rtz(ulong); half __ovld __cnfn convert_half_rtz(char); half __ovld __cnfn convert_half_rtz(short); half __ovld __cnfn convert_half_rtz(int); half __ovld __cnfn convert_half_rtz(long); half __ovld __cnfn convert_half_rtz(float); half __ovld __cnfn convert_half_rtz(half); half2 __ovld __cnfn convert_half2(char2); half2 __ovld __cnfn convert_half2(uchar2); half2 __ovld __cnfn convert_half2(short2); half2 __ovld __cnfn convert_half2(ushort2); half2 __ovld __cnfn convert_half2(int2); half2 __ovld __cnfn convert_half2(uint2); half2 __ovld __cnfn convert_half2(long2); half2 __ovld __cnfn convert_half2(ulong2); half2 __ovld __cnfn convert_half2(float2); half2 __ovld __cnfn convert_half2(half2); half2 __ovld __cnfn convert_half2_rte(char2); half2 __ovld __cnfn convert_half2_rte(uchar2); half2 __ovld __cnfn convert_half2_rte(short2); half2 __ovld __cnfn convert_half2_rte(ushort2); half2 __ovld __cnfn convert_half2_rte(int2); half2 __ovld __cnfn convert_half2_rte(uint2); half2 __ovld __cnfn convert_half2_rte(long2); half2 __ovld __cnfn convert_half2_rte(ulong2); half2 __ovld __cnfn convert_half2_rte(float2); half2 __ovld __cnfn convert_half2_rte(half2); half2 __ovld __cnfn convert_half2_rtp(char2); half2 __ovld __cnfn convert_half2_rtp(uchar2); half2 __ovld __cnfn convert_half2_rtp(short2); half2 __ovld __cnfn convert_half2_rtp(ushort2); half2 __ovld __cnfn convert_half2_rtp(int2); half2 __ovld __cnfn convert_half2_rtp(uint2); half2 __ovld __cnfn convert_half2_rtp(long2); half2 __ovld __cnfn convert_half2_rtp(ulong2); half2 __ovld __cnfn convert_half2_rtp(float2); half2 __ovld __cnfn convert_half2_rtp(half2); half2 __ovld __cnfn convert_half2_rtn(char2); half2 __ovld __cnfn convert_half2_rtn(uchar2); half2 __ovld __cnfn convert_half2_rtn(short2); half2 __ovld __cnfn convert_half2_rtn(ushort2); half2 __ovld __cnfn convert_half2_rtn(int2); half2 __ovld __cnfn convert_half2_rtn(uint2); half2 __ovld __cnfn convert_half2_rtn(long2); half2 __ovld __cnfn convert_half2_rtn(ulong2); half2 __ovld __cnfn convert_half2_rtn(float2); half2 __ovld __cnfn convert_half2_rtn(half2); half2 __ovld __cnfn convert_half2_rtz(char2); half2 __ovld __cnfn convert_half2_rtz(uchar2); half2 __ovld __cnfn convert_half2_rtz(short2); half2 __ovld __cnfn convert_half2_rtz(ushort2); half2 __ovld __cnfn convert_half2_rtz(int2); half2 __ovld __cnfn convert_half2_rtz(uint2); half2 __ovld __cnfn convert_half2_rtz(long2); half2 __ovld __cnfn convert_half2_rtz(ulong2); half2 __ovld __cnfn convert_half2_rtz(float2); half2 __ovld __cnfn convert_half2_rtz(half2); half3 __ovld __cnfn convert_half3(char3); half3 __ovld __cnfn convert_half3(uchar3); half3 __ovld __cnfn convert_half3(short3); half3 __ovld __cnfn convert_half3(ushort3); half3 __ovld __cnfn convert_half3(int3); half3 __ovld __cnfn convert_half3(uint3); half3 __ovld __cnfn convert_half3(long3); half3 __ovld __cnfn convert_half3(ulong3); half3 __ovld __cnfn convert_half3(float3); half3 __ovld __cnfn convert_half3(half3); half3 __ovld __cnfn convert_half3_rte(char3); half3 __ovld __cnfn convert_half3_rte(uchar3); half3 __ovld __cnfn convert_half3_rte(short3); half3 __ovld __cnfn convert_half3_rte(ushort3); half3 __ovld __cnfn convert_half3_rte(int3); half3 __ovld __cnfn convert_half3_rte(uint3); half3 __ovld __cnfn convert_half3_rte(long3); half3 __ovld __cnfn convert_half3_rte(ulong3); half3 __ovld __cnfn convert_half3_rte(float3); half3 __ovld __cnfn convert_half3_rte(half3); half3 __ovld __cnfn convert_half3_rtp(char3); half3 __ovld __cnfn convert_half3_rtp(uchar3); half3 __ovld __cnfn convert_half3_rtp(short3); half3 __ovld __cnfn convert_half3_rtp(ushort3); half3 __ovld __cnfn convert_half3_rtp(int3); half3 __ovld __cnfn convert_half3_rtp(uint3); half3 __ovld __cnfn convert_half3_rtp(long3); half3 __ovld __cnfn convert_half3_rtp(ulong3); half3 __ovld __cnfn convert_half3_rtp(float3); half3 __ovld __cnfn convert_half3_rtp(half3); half3 __ovld __cnfn convert_half3_rtn(char3); half3 __ovld __cnfn convert_half3_rtn(uchar3); half3 __ovld __cnfn convert_half3_rtn(short3); half3 __ovld __cnfn convert_half3_rtn(ushort3); half3 __ovld __cnfn convert_half3_rtn(int3); half3 __ovld __cnfn convert_half3_rtn(uint3); half3 __ovld __cnfn convert_half3_rtn(long3); half3 __ovld __cnfn convert_half3_rtn(ulong3); half3 __ovld __cnfn convert_half3_rtn(float3); half3 __ovld __cnfn convert_half3_rtn(half3); half3 __ovld __cnfn convert_half3_rtz(char3); half3 __ovld __cnfn convert_half3_rtz(uchar3); half3 __ovld __cnfn convert_half3_rtz(short3); half3 __ovld __cnfn convert_half3_rtz(ushort3); half3 __ovld __cnfn convert_half3_rtz(int3); half3 __ovld __cnfn convert_half3_rtz(uint3); half3 __ovld __cnfn convert_half3_rtz(long3); half3 __ovld __cnfn convert_half3_rtz(ulong3); half3 __ovld __cnfn convert_half3_rtz(float3); half3 __ovld __cnfn convert_half3_rtz(half3); half4 __ovld __cnfn convert_half4(char4); half4 __ovld __cnfn convert_half4(uchar4); half4 __ovld __cnfn convert_half4(short4); half4 __ovld __cnfn convert_half4(ushort4); half4 __ovld __cnfn convert_half4(int4); half4 __ovld __cnfn convert_half4(uint4); half4 __ovld __cnfn convert_half4(long4); half4 __ovld __cnfn convert_half4(ulong4); half4 __ovld __cnfn convert_half4(float4); half4 __ovld __cnfn convert_half4(half4); half4 __ovld __cnfn convert_half4_rte(char4); half4 __ovld __cnfn convert_half4_rte(uchar4); half4 __ovld __cnfn convert_half4_rte(short4); half4 __ovld __cnfn convert_half4_rte(ushort4); half4 __ovld __cnfn convert_half4_rte(int4); half4 __ovld __cnfn convert_half4_rte(uint4); half4 __ovld __cnfn convert_half4_rte(long4); half4 __ovld __cnfn convert_half4_rte(ulong4); half4 __ovld __cnfn convert_half4_rte(float4); half4 __ovld __cnfn convert_half4_rte(half4); half4 __ovld __cnfn convert_half4_rtp(char4); half4 __ovld __cnfn convert_half4_rtp(uchar4); half4 __ovld __cnfn convert_half4_rtp(short4); half4 __ovld __cnfn convert_half4_rtp(ushort4); half4 __ovld __cnfn convert_half4_rtp(int4); half4 __ovld __cnfn convert_half4_rtp(uint4); half4 __ovld __cnfn convert_half4_rtp(long4); half4 __ovld __cnfn convert_half4_rtp(ulong4); half4 __ovld __cnfn convert_half4_rtp(float4); half4 __ovld __cnfn convert_half4_rtp(half4); half4 __ovld __cnfn convert_half4_rtn(char4); half4 __ovld __cnfn convert_half4_rtn(uchar4); half4 __ovld __cnfn convert_half4_rtn(short4); half4 __ovld __cnfn convert_half4_rtn(ushort4); half4 __ovld __cnfn convert_half4_rtn(int4); half4 __ovld __cnfn convert_half4_rtn(uint4); half4 __ovld __cnfn convert_half4_rtn(long4); half4 __ovld __cnfn convert_half4_rtn(ulong4); half4 __ovld __cnfn convert_half4_rtn(float4); half4 __ovld __cnfn convert_half4_rtn(half4); half4 __ovld __cnfn convert_half4_rtz(char4); half4 __ovld __cnfn convert_half4_rtz(uchar4); half4 __ovld __cnfn convert_half4_rtz(short4); half4 __ovld __cnfn convert_half4_rtz(ushort4); half4 __ovld __cnfn convert_half4_rtz(int4); half4 __ovld __cnfn convert_half4_rtz(uint4); half4 __ovld __cnfn convert_half4_rtz(long4); half4 __ovld __cnfn convert_half4_rtz(ulong4); half4 __ovld __cnfn convert_half4_rtz(float4); half4 __ovld __cnfn convert_half4_rtz(half4); half8 __ovld __cnfn convert_half8(char8); half8 __ovld __cnfn convert_half8(uchar8); half8 __ovld __cnfn convert_half8(short8); half8 __ovld __cnfn convert_half8(ushort8); half8 __ovld __cnfn convert_half8(int8); half8 __ovld __cnfn convert_half8(uint8); half8 __ovld __cnfn convert_half8(long8); half8 __ovld __cnfn convert_half8(ulong8); half8 __ovld __cnfn convert_half8(float8); half8 __ovld __cnfn convert_half8(half8); half8 __ovld __cnfn convert_half8_rte(char8); half8 __ovld __cnfn convert_half8_rte(uchar8); half8 __ovld __cnfn convert_half8_rte(short8); half8 __ovld __cnfn convert_half8_rte(ushort8); half8 __ovld __cnfn convert_half8_rte(int8); half8 __ovld __cnfn convert_half8_rte(uint8); half8 __ovld __cnfn convert_half8_rte(long8); half8 __ovld __cnfn convert_half8_rte(ulong8); half8 __ovld __cnfn convert_half8_rte(float8); half8 __ovld __cnfn convert_half8_rte(half8); half8 __ovld __cnfn convert_half8_rtp(char8); half8 __ovld __cnfn convert_half8_rtp(uchar8); half8 __ovld __cnfn convert_half8_rtp(short8); half8 __ovld __cnfn convert_half8_rtp(ushort8); half8 __ovld __cnfn convert_half8_rtp(int8); half8 __ovld __cnfn convert_half8_rtp(uint8); half8 __ovld __cnfn convert_half8_rtp(long8); half8 __ovld __cnfn convert_half8_rtp(ulong8); half8 __ovld __cnfn convert_half8_rtp(float8); half8 __ovld __cnfn convert_half8_rtp(half8); half8 __ovld __cnfn convert_half8_rtn(char8); half8 __ovld __cnfn convert_half8_rtn(uchar8); half8 __ovld __cnfn convert_half8_rtn(short8); half8 __ovld __cnfn convert_half8_rtn(ushort8); half8 __ovld __cnfn convert_half8_rtn(int8); half8 __ovld __cnfn convert_half8_rtn(uint8); half8 __ovld __cnfn convert_half8_rtn(long8); half8 __ovld __cnfn convert_half8_rtn(ulong8); half8 __ovld __cnfn convert_half8_rtn(float8); half8 __ovld __cnfn convert_half8_rtn(half8); half8 __ovld __cnfn convert_half8_rtz(char8); half8 __ovld __cnfn convert_half8_rtz(uchar8); half8 __ovld __cnfn convert_half8_rtz(short8); half8 __ovld __cnfn convert_half8_rtz(ushort8); half8 __ovld __cnfn convert_half8_rtz(int8); half8 __ovld __cnfn convert_half8_rtz(uint8); half8 __ovld __cnfn convert_half8_rtz(long8); half8 __ovld __cnfn convert_half8_rtz(ulong8); half8 __ovld __cnfn convert_half8_rtz(float8); half8 __ovld __cnfn convert_half8_rtz(half8); half16 __ovld __cnfn convert_half16(char16); half16 __ovld __cnfn convert_half16(uchar16); half16 __ovld __cnfn convert_half16(short16); half16 __ovld __cnfn convert_half16(ushort16); half16 __ovld __cnfn convert_half16(int16); half16 __ovld __cnfn convert_half16(uint16); half16 __ovld __cnfn convert_half16(long16); half16 __ovld __cnfn convert_half16(ulong16); half16 __ovld __cnfn convert_half16(float16); half16 __ovld __cnfn convert_half16(half16); half16 __ovld __cnfn convert_half16_rte(char16); half16 __ovld __cnfn convert_half16_rte(uchar16); half16 __ovld __cnfn convert_half16_rte(short16); half16 __ovld __cnfn convert_half16_rte(ushort16); half16 __ovld __cnfn convert_half16_rte(int16); half16 __ovld __cnfn convert_half16_rte(uint16); half16 __ovld __cnfn convert_half16_rte(long16); half16 __ovld __cnfn convert_half16_rte(ulong16); half16 __ovld __cnfn convert_half16_rte(float16); half16 __ovld __cnfn convert_half16_rte(half16); half16 __ovld __cnfn convert_half16_rtp(char16); half16 __ovld __cnfn convert_half16_rtp(uchar16); half16 __ovld __cnfn convert_half16_rtp(short16); half16 __ovld __cnfn convert_half16_rtp(ushort16); half16 __ovld __cnfn convert_half16_rtp(int16); half16 __ovld __cnfn convert_half16_rtp(uint16); half16 __ovld __cnfn convert_half16_rtp(long16); half16 __ovld __cnfn convert_half16_rtp(ulong16); half16 __ovld __cnfn convert_half16_rtp(float16); half16 __ovld __cnfn convert_half16_rtp(half16); half16 __ovld __cnfn convert_half16_rtn(char16); half16 __ovld __cnfn convert_half16_rtn(uchar16); half16 __ovld __cnfn convert_half16_rtn(short16); half16 __ovld __cnfn convert_half16_rtn(ushort16); half16 __ovld __cnfn convert_half16_rtn(int16); half16 __ovld __cnfn convert_half16_rtn(uint16); half16 __ovld __cnfn convert_half16_rtn(long16); half16 __ovld __cnfn convert_half16_rtn(ulong16); half16 __ovld __cnfn convert_half16_rtn(float16); half16 __ovld __cnfn convert_half16_rtn(half16); half16 __ovld __cnfn convert_half16_rtz(char16); half16 __ovld __cnfn convert_half16_rtz(uchar16); half16 __ovld __cnfn convert_half16_rtz(short16); half16 __ovld __cnfn convert_half16_rtz(ushort16); half16 __ovld __cnfn convert_half16_rtz(int16); half16 __ovld __cnfn convert_half16_rtz(uint16); half16 __ovld __cnfn convert_half16_rtz(long16); half16 __ovld __cnfn convert_half16_rtz(ulong16); half16 __ovld __cnfn convert_half16_rtz(float16); half16 __ovld __cnfn convert_half16_rtz(half16); // Convert half types to double types. #ifdef cl_khr_fp64 double __ovld __cnfn convert_double(half); double __ovld __cnfn convert_double_rte(half); double __ovld __cnfn convert_double_rtp(half); double __ovld __cnfn convert_double_rtn(half); double __ovld __cnfn convert_double_rtz(half); double2 __ovld __cnfn convert_double2(half2); double2 __ovld __cnfn convert_double2_rte(half2); double2 __ovld __cnfn convert_double2_rtp(half2); double2 __ovld __cnfn convert_double2_rtn(half2); double2 __ovld __cnfn convert_double2_rtz(half2); double3 __ovld __cnfn convert_double3(half3); double3 __ovld __cnfn convert_double3_rte(half3); double3 __ovld __cnfn convert_double3_rtp(half3); double3 __ovld __cnfn convert_double3_rtn(half3); double3 __ovld __cnfn convert_double3_rtz(half3); double4 __ovld __cnfn convert_double4(half4); double4 __ovld __cnfn convert_double4_rte(half4); double4 __ovld __cnfn convert_double4_rtp(half4); double4 __ovld __cnfn convert_double4_rtn(half4); double4 __ovld __cnfn convert_double4_rtz(half4); double8 __ovld __cnfn convert_double8(half8); double8 __ovld __cnfn convert_double8_rte(half8); double8 __ovld __cnfn convert_double8_rtp(half8); double8 __ovld __cnfn convert_double8_rtn(half8); double8 __ovld __cnfn convert_double8_rtz(half8); double16 __ovld __cnfn convert_double16(half16); double16 __ovld __cnfn convert_double16_rte(half16); double16 __ovld __cnfn convert_double16_rtp(half16); double16 __ovld __cnfn convert_double16_rtn(half16); double16 __ovld __cnfn convert_double16_rtz(half16); // Convert double types to half types. half __ovld __cnfn convert_half(double); half __ovld __cnfn convert_half_rte(double); half __ovld __cnfn convert_half_rtp(double); half __ovld __cnfn convert_half_rtn(double); half __ovld __cnfn convert_half_rtz(double); half2 __ovld __cnfn convert_half2(double2); half2 __ovld __cnfn convert_half2_rte(double2); half2 __ovld __cnfn convert_half2_rtp(double2); half2 __ovld __cnfn convert_half2_rtn(double2); half2 __ovld __cnfn convert_half2_rtz(double2); half3 __ovld __cnfn convert_half3(double3); half3 __ovld __cnfn convert_half3_rte(double3); half3 __ovld __cnfn convert_half3_rtp(double3); half3 __ovld __cnfn convert_half3_rtn(double3); half3 __ovld __cnfn convert_half3_rtz(double3); half4 __ovld __cnfn convert_half4(double4); half4 __ovld __cnfn convert_half4_rte(double4); half4 __ovld __cnfn convert_half4_rtp(double4); half4 __ovld __cnfn convert_half4_rtn(double4); half4 __ovld __cnfn convert_half4_rtz(double4); half8 __ovld __cnfn convert_half8(double8); half8 __ovld __cnfn convert_half8_rte(double8); half8 __ovld __cnfn convert_half8_rtp(double8); half8 __ovld __cnfn convert_half8_rtn(double8); half8 __ovld __cnfn convert_half8_rtz(double8); half16 __ovld __cnfn convert_half16(double16); half16 __ovld __cnfn convert_half16_rte(double16); half16 __ovld __cnfn convert_half16_rtp(double16); half16 __ovld __cnfn convert_half16_rtn(double16); half16 __ovld __cnfn convert_half16_rtz(double16); #endif //cl_khr_fp64 #endif // cl_khr_fp16 // OpenCL v1.1 s6.11.1, v1.2 s6.12.1, v2.0 s6.13.1 - Work-item Functions /** * Returns the number of dimensions in use. This is the * value given to the work_dim argument specified in * clEnqueueNDRangeKernel. * For clEnqueueTask, this returns 1. */ uint __ovld __cnfn get_work_dim(void); /** * Returns the number of global work-items specified for * dimension identified by dimindx. This value is given by * the global_work_size argument to * clEnqueueNDRangeKernel. Valid values of dimindx * are 0 to get_work_dim() - 1. For other values of * dimindx, get_global_size() returns 1. * For clEnqueueTask, this always returns 1. */ size_t __ovld __cnfn get_global_size(uint); /** * Returns the unique global work-item ID value for * dimension identified by dimindx. The global work-item * ID specifies the work-item ID based on the number of * global work-items specified to execute the kernel. Valid * values of dimindx are 0 to get_work_dim() - 1. For * other values of dimindx, get_global_id() returns 0. * For clEnqueueTask, this returns 0. */ size_t __ovld __cnfn get_global_id(uint); /** * Returns the number of local work-items specified in * dimension identified by dimindx. This value is given by * the local_work_size argument to * clEnqueueNDRangeKernel if local_work_size is not * NULL; otherwise the OpenCL implementation chooses * an appropriate local_work_size value which is returned * by this function. Valid values of dimindx are 0 to * get_work_dim() - 1. For other values of dimindx, * get_local_size() returns 1. * For clEnqueueTask, this always returns 1. */ size_t __ovld __cnfn get_local_size(uint); /** * Returns the unique local work-item ID i.e. a work-item * within a specific work-group for dimension identified by * dimindx. Valid values of dimindx are 0 to * get_work_dim() - 1. For other values of dimindx, * get_local_id() returns 0. * For clEnqueueTask, this returns 0. */ size_t __ovld __cnfn get_local_id(uint); /** * Returns the number of work-groups that will execute a * kernel for dimension identified by dimindx. * Valid values of dimindx are 0 to get_work_dim() - 1. * For other values of dimindx, get_num_groups() returns 1. * For clEnqueueTask, this always returns 1. */ size_t __ovld __cnfn get_num_groups(uint); /** * get_group_id returns the work-group ID which is a * number from 0 .. get_num_groups(dimindx) - 1. * Valid values of dimindx are 0 to get_work_dim() - 1. * For other values, get_group_id() returns 0. * For clEnqueueTask, this returns 0. */ size_t __ovld __cnfn get_group_id(uint); /** * get_global_offset returns the offset values specified in * global_work_offset argument to * clEnqueueNDRangeKernel. * Valid values of dimindx are 0 to get_work_dim() - 1. * For other values, get_global_offset() returns 0. * For clEnqueueTask, this returns 0. */ size_t __ovld __cnfn get_global_offset(uint); #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) size_t __ovld get_enqueued_local_size(uint); size_t __ovld get_global_linear_id(void); size_t __ovld get_local_linear_id(void); #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) // OpenCL v1.1 s6.11.2, v1.2 s6.12.2, v2.0 s6.13.2 - Math functions /** * Arc cosine function. */ float __ovld __cnfn acos(float); float2 __ovld __cnfn acos(float2); float3 __ovld __cnfn acos(float3); float4 __ovld __cnfn acos(float4); float8 __ovld __cnfn acos(float8); float16 __ovld __cnfn acos(float16); #ifdef cl_khr_fp64 double __ovld __cnfn acos(double); double2 __ovld __cnfn acos(double2); double3 __ovld __cnfn acos(double3); double4 __ovld __cnfn acos(double4); double8 __ovld __cnfn acos(double8); double16 __ovld __cnfn acos(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn acos(half); half2 __ovld __cnfn acos(half2); half3 __ovld __cnfn acos(half3); half4 __ovld __cnfn acos(half4); half8 __ovld __cnfn acos(half8); half16 __ovld __cnfn acos(half16); #endif //cl_khr_fp16 /** * Inverse hyperbolic cosine. */ float __ovld __cnfn acosh(float); float2 __ovld __cnfn acosh(float2); float3 __ovld __cnfn acosh(float3); float4 __ovld __cnfn acosh(float4); float8 __ovld __cnfn acosh(float8); float16 __ovld __cnfn acosh(float16); #ifdef cl_khr_fp64 double __ovld __cnfn acosh(double); double2 __ovld __cnfn acosh(double2); double3 __ovld __cnfn acosh(double3); double4 __ovld __cnfn acosh(double4); double8 __ovld __cnfn acosh(double8); double16 __ovld __cnfn acosh(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn acosh(half); half2 __ovld __cnfn acosh(half2); half3 __ovld __cnfn acosh(half3); half4 __ovld __cnfn acosh(half4); half8 __ovld __cnfn acosh(half8); half16 __ovld __cnfn acosh(half16); #endif //cl_khr_fp16 /** * Compute acos (x) / PI. */ float __ovld __cnfn acospi(float); float2 __ovld __cnfn acospi(float2); float3 __ovld __cnfn acospi(float3); float4 __ovld __cnfn acospi(float4); float8 __ovld __cnfn acospi(float8); float16 __ovld __cnfn acospi(float16); #ifdef cl_khr_fp64 double __ovld __cnfn acospi(double); double2 __ovld __cnfn acospi(double2); double3 __ovld __cnfn acospi(double3); double4 __ovld __cnfn acospi(double4); double8 __ovld __cnfn acospi(double8); double16 __ovld __cnfn acospi(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn acospi(half); half2 __ovld __cnfn acospi(half2); half3 __ovld __cnfn acospi(half3); half4 __ovld __cnfn acospi(half4); half8 __ovld __cnfn acospi(half8); half16 __ovld __cnfn acospi(half16); #endif //cl_khr_fp16 /** * Arc sine function. */ float __ovld __cnfn asin(float); float2 __ovld __cnfn asin(float2); float3 __ovld __cnfn asin(float3); float4 __ovld __cnfn asin(float4); float8 __ovld __cnfn asin(float8); float16 __ovld __cnfn asin(float16); #ifdef cl_khr_fp64 double __ovld __cnfn asin(double); double2 __ovld __cnfn asin(double2); double3 __ovld __cnfn asin(double3); double4 __ovld __cnfn asin(double4); double8 __ovld __cnfn asin(double8); double16 __ovld __cnfn asin(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn asin(half); half2 __ovld __cnfn asin(half2); half3 __ovld __cnfn asin(half3); half4 __ovld __cnfn asin(half4); half8 __ovld __cnfn asin(half8); half16 __ovld __cnfn asin(half16); #endif //cl_khr_fp16 /** * Inverse hyperbolic sine. */ float __ovld __cnfn asinh(float); float2 __ovld __cnfn asinh(float2); float3 __ovld __cnfn asinh(float3); float4 __ovld __cnfn asinh(float4); float8 __ovld __cnfn asinh(float8); float16 __ovld __cnfn asinh(float16); #ifdef cl_khr_fp64 double __ovld __cnfn asinh(double); double2 __ovld __cnfn asinh(double2); double3 __ovld __cnfn asinh(double3); double4 __ovld __cnfn asinh(double4); double8 __ovld __cnfn asinh(double8); double16 __ovld __cnfn asinh(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn asinh(half); half2 __ovld __cnfn asinh(half2); half3 __ovld __cnfn asinh(half3); half4 __ovld __cnfn asinh(half4); half8 __ovld __cnfn asinh(half8); half16 __ovld __cnfn asinh(half16); #endif //cl_khr_fp16 /** * Compute asin (x) / PI. */ float __ovld __cnfn asinpi(float); float2 __ovld __cnfn asinpi(float2); float3 __ovld __cnfn asinpi(float3); float4 __ovld __cnfn asinpi(float4); float8 __ovld __cnfn asinpi(float8); float16 __ovld __cnfn asinpi(float16); #ifdef cl_khr_fp64 double __ovld __cnfn asinpi(double); double2 __ovld __cnfn asinpi(double2); double3 __ovld __cnfn asinpi(double3); double4 __ovld __cnfn asinpi(double4); double8 __ovld __cnfn asinpi(double8); double16 __ovld __cnfn asinpi(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn asinpi(half); half2 __ovld __cnfn asinpi(half2); half3 __ovld __cnfn asinpi(half3); half4 __ovld __cnfn asinpi(half4); half8 __ovld __cnfn asinpi(half8); half16 __ovld __cnfn asinpi(half16); #endif //cl_khr_fp16 /** * Arc tangent function. */ float __ovld __cnfn atan(float); float2 __ovld __cnfn atan(float2); float3 __ovld __cnfn atan(float3); float4 __ovld __cnfn atan(float4); float8 __ovld __cnfn atan(float8); float16 __ovld __cnfn atan(float16); #ifdef cl_khr_fp64 double __ovld __cnfn atan(double); double2 __ovld __cnfn atan(double2); double3 __ovld __cnfn atan(double3); double4 __ovld __cnfn atan(double4); double8 __ovld __cnfn atan(double8); double16 __ovld __cnfn atan(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn atan(half); half2 __ovld __cnfn atan(half2); half3 __ovld __cnfn atan(half3); half4 __ovld __cnfn atan(half4); half8 __ovld __cnfn atan(half8); half16 __ovld __cnfn atan(half16); #endif //cl_khr_fp16 /** * Arc tangent of y / x. */ float __ovld __cnfn atan2(float, float); float2 __ovld __cnfn atan2(float2, float2); float3 __ovld __cnfn atan2(float3, float3); float4 __ovld __cnfn atan2(float4, float4); float8 __ovld __cnfn atan2(float8, float8); float16 __ovld __cnfn atan2(float16, float16); #ifdef cl_khr_fp64 double __ovld __cnfn atan2(double, double); double2 __ovld __cnfn atan2(double2, double2); double3 __ovld __cnfn atan2(double3, double3); double4 __ovld __cnfn atan2(double4, double4); double8 __ovld __cnfn atan2(double8, double8); double16 __ovld __cnfn atan2(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn atan2(half, half); half2 __ovld __cnfn atan2(half2, half2); half3 __ovld __cnfn atan2(half3, half3); half4 __ovld __cnfn atan2(half4, half4); half8 __ovld __cnfn atan2(half8, half8); half16 __ovld __cnfn atan2(half16, half16); #endif //cl_khr_fp16 /** * Hyperbolic arc tangent. */ float __ovld __cnfn atanh(float); float2 __ovld __cnfn atanh(float2); float3 __ovld __cnfn atanh(float3); float4 __ovld __cnfn atanh(float4); float8 __ovld __cnfn atanh(float8); float16 __ovld __cnfn atanh(float16); #ifdef cl_khr_fp64 double __ovld __cnfn atanh(double); double2 __ovld __cnfn atanh(double2); double3 __ovld __cnfn atanh(double3); double4 __ovld __cnfn atanh(double4); double8 __ovld __cnfn atanh(double8); double16 __ovld __cnfn atanh(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn atanh(half); half2 __ovld __cnfn atanh(half2); half3 __ovld __cnfn atanh(half3); half4 __ovld __cnfn atanh(half4); half8 __ovld __cnfn atanh(half8); half16 __ovld __cnfn atanh(half16); #endif //cl_khr_fp16 /** * Compute atan (x) / PI. */ float __ovld __cnfn atanpi(float); float2 __ovld __cnfn atanpi(float2); float3 __ovld __cnfn atanpi(float3); float4 __ovld __cnfn atanpi(float4); float8 __ovld __cnfn atanpi(float8); float16 __ovld __cnfn atanpi(float16); #ifdef cl_khr_fp64 double __ovld __cnfn atanpi(double); double2 __ovld __cnfn atanpi(double2); double3 __ovld __cnfn atanpi(double3); double4 __ovld __cnfn atanpi(double4); double8 __ovld __cnfn atanpi(double8); double16 __ovld __cnfn atanpi(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn atanpi(half); half2 __ovld __cnfn atanpi(half2); half3 __ovld __cnfn atanpi(half3); half4 __ovld __cnfn atanpi(half4); half8 __ovld __cnfn atanpi(half8); half16 __ovld __cnfn atanpi(half16); #endif //cl_khr_fp16 /** * Compute atan2 (y, x) / PI. */ float __ovld __cnfn atan2pi(float, float); float2 __ovld __cnfn atan2pi(float2, float2); float3 __ovld __cnfn atan2pi(float3, float3); float4 __ovld __cnfn atan2pi(float4, float4); float8 __ovld __cnfn atan2pi(float8, float8); float16 __ovld __cnfn atan2pi(float16, float16); #ifdef cl_khr_fp64 double __ovld __cnfn atan2pi(double, double); double2 __ovld __cnfn atan2pi(double2, double2); double3 __ovld __cnfn atan2pi(double3, double3); double4 __ovld __cnfn atan2pi(double4, double4); double8 __ovld __cnfn atan2pi(double8, double8); double16 __ovld __cnfn atan2pi(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn atan2pi(half, half); half2 __ovld __cnfn atan2pi(half2, half2); half3 __ovld __cnfn atan2pi(half3, half3); half4 __ovld __cnfn atan2pi(half4, half4); half8 __ovld __cnfn atan2pi(half8, half8); half16 __ovld __cnfn atan2pi(half16, half16); #endif //cl_khr_fp16 /** * Compute cube-root. */ float __ovld __cnfn cbrt(float); float2 __ovld __cnfn cbrt(float2); float3 __ovld __cnfn cbrt(float3); float4 __ovld __cnfn cbrt(float4); float8 __ovld __cnfn cbrt(float8); float16 __ovld __cnfn cbrt(float16); #ifdef cl_khr_fp64 double __ovld __cnfn cbrt(double); double2 __ovld __cnfn cbrt(double2); double3 __ovld __cnfn cbrt(double3); double4 __ovld __cnfn cbrt(double4); double8 __ovld __cnfn cbrt(double8); double16 __ovld __cnfn cbrt(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn cbrt(half); half2 __ovld __cnfn cbrt(half2); half3 __ovld __cnfn cbrt(half3); half4 __ovld __cnfn cbrt(half4); half8 __ovld __cnfn cbrt(half8); half16 __ovld __cnfn cbrt(half16); #endif //cl_khr_fp16 /** * Round to integral value using the round to positive * infinity rounding mode. */ float __ovld __cnfn ceil(float); float2 __ovld __cnfn ceil(float2); float3 __ovld __cnfn ceil(float3); float4 __ovld __cnfn ceil(float4); float8 __ovld __cnfn ceil(float8); float16 __ovld __cnfn ceil(float16); #ifdef cl_khr_fp64 double __ovld __cnfn ceil(double); double2 __ovld __cnfn ceil(double2); double3 __ovld __cnfn ceil(double3); double4 __ovld __cnfn ceil(double4); double8 __ovld __cnfn ceil(double8); double16 __ovld __cnfn ceil(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn ceil(half); half2 __ovld __cnfn ceil(half2); half3 __ovld __cnfn ceil(half3); half4 __ovld __cnfn ceil(half4); half8 __ovld __cnfn ceil(half8); half16 __ovld __cnfn ceil(half16); #endif //cl_khr_fp16 /** * Returns x with its sign changed to match the sign of y. */ float __ovld __cnfn copysign(float, float); float2 __ovld __cnfn copysign(float2, float2); float3 __ovld __cnfn copysign(float3, float3); float4 __ovld __cnfn copysign(float4, float4); float8 __ovld __cnfn copysign(float8, float8); float16 __ovld __cnfn copysign(float16, float16); #ifdef cl_khr_fp64 double __ovld __cnfn copysign(double, double); double2 __ovld __cnfn copysign(double2, double2); double3 __ovld __cnfn copysign(double3, double3); double4 __ovld __cnfn copysign(double4, double4); double8 __ovld __cnfn copysign(double8, double8); double16 __ovld __cnfn copysign(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn copysign(half, half); half2 __ovld __cnfn copysign(half2, half2); half3 __ovld __cnfn copysign(half3, half3); half4 __ovld __cnfn copysign(half4, half4); half8 __ovld __cnfn copysign(half8, half8); half16 __ovld __cnfn copysign(half16, half16); #endif //cl_khr_fp16 /** * Compute cosine. */ float __ovld __cnfn cos(float); float2 __ovld __cnfn cos(float2); float3 __ovld __cnfn cos(float3); float4 __ovld __cnfn cos(float4); float8 __ovld __cnfn cos(float8); float16 __ovld __cnfn cos(float16); #ifdef cl_khr_fp64 double __ovld __cnfn cos(double); double2 __ovld __cnfn cos(double2); double3 __ovld __cnfn cos(double3); double4 __ovld __cnfn cos(double4); double8 __ovld __cnfn cos(double8); double16 __ovld __cnfn cos(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn cos(half); half2 __ovld __cnfn cos(half2); half3 __ovld __cnfn cos(half3); half4 __ovld __cnfn cos(half4); half8 __ovld __cnfn cos(half8); half16 __ovld __cnfn cos(half16); #endif //cl_khr_fp16 /** * Compute hyperbolic cosine. */ float __ovld __cnfn cosh(float); float2 __ovld __cnfn cosh(float2); float3 __ovld __cnfn cosh(float3); float4 __ovld __cnfn cosh(float4); float8 __ovld __cnfn cosh(float8); float16 __ovld __cnfn cosh(float16); #ifdef cl_khr_fp64 double __ovld __cnfn cosh(double); double2 __ovld __cnfn cosh(double2); double3 __ovld __cnfn cosh(double3); double4 __ovld __cnfn cosh(double4); double8 __ovld __cnfn cosh(double8); double16 __ovld __cnfn cosh(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn cosh(half); half2 __ovld __cnfn cosh(half2); half3 __ovld __cnfn cosh(half3); half4 __ovld __cnfn cosh(half4); half8 __ovld __cnfn cosh(half8); half16 __ovld __cnfn cosh(half16); #endif //cl_khr_fp16 /** * Compute cos (PI * x). */ float __ovld __cnfn cospi(float); float2 __ovld __cnfn cospi(float2); float3 __ovld __cnfn cospi(float3); float4 __ovld __cnfn cospi(float4); float8 __ovld __cnfn cospi(float8); float16 __ovld __cnfn cospi(float16); #ifdef cl_khr_fp64 double __ovld __cnfn cospi(double); double2 __ovld __cnfn cospi(double2); double3 __ovld __cnfn cospi(double3); double4 __ovld __cnfn cospi(double4); double8 __ovld __cnfn cospi(double8); double16 __ovld __cnfn cospi(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn cospi(half); half2 __ovld __cnfn cospi(half2); half3 __ovld __cnfn cospi(half3); half4 __ovld __cnfn cospi(half4); half8 __ovld __cnfn cospi(half8); half16 __ovld __cnfn cospi(half16); #endif //cl_khr_fp16 /** * Complementary error function. */ float __ovld __cnfn erfc(float); float2 __ovld __cnfn erfc(float2); float3 __ovld __cnfn erfc(float3); float4 __ovld __cnfn erfc(float4); float8 __ovld __cnfn erfc(float8); float16 __ovld __cnfn erfc(float16); #ifdef cl_khr_fp64 double __ovld __cnfn erfc(double); double2 __ovld __cnfn erfc(double2); double3 __ovld __cnfn erfc(double3); double4 __ovld __cnfn erfc(double4); double8 __ovld __cnfn erfc(double8); double16 __ovld __cnfn erfc(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn erfc(half); half2 __ovld __cnfn erfc(half2); half3 __ovld __cnfn erfc(half3); half4 __ovld __cnfn erfc(half4); half8 __ovld __cnfn erfc(half8); half16 __ovld __cnfn erfc(half16); #endif //cl_khr_fp16 /** * Error function encountered in integrating the * normal distribution. */ float __ovld __cnfn erf(float); float2 __ovld __cnfn erf(float2); float3 __ovld __cnfn erf(float3); float4 __ovld __cnfn erf(float4); float8 __ovld __cnfn erf(float8); float16 __ovld __cnfn erf(float16); #ifdef cl_khr_fp64 double __ovld __cnfn erf(double); double2 __ovld __cnfn erf(double2); double3 __ovld __cnfn erf(double3); double4 __ovld __cnfn erf(double4); double8 __ovld __cnfn erf(double8); double16 __ovld __cnfn erf(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn erf(half); half2 __ovld __cnfn erf(half2); half3 __ovld __cnfn erf(half3); half4 __ovld __cnfn erf(half4); half8 __ovld __cnfn erf(half8); half16 __ovld __cnfn erf(half16); #endif //cl_khr_fp16 /** * Compute the base e exponential function of x. */ float __ovld __cnfn exp(float); float2 __ovld __cnfn exp(float2); float3 __ovld __cnfn exp(float3); float4 __ovld __cnfn exp(float4); float8 __ovld __cnfn exp(float8); float16 __ovld __cnfn exp(float16); #ifdef cl_khr_fp64 double __ovld __cnfn exp(double); double2 __ovld __cnfn exp(double2); double3 __ovld __cnfn exp(double3); double4 __ovld __cnfn exp(double4); double8 __ovld __cnfn exp(double8); double16 __ovld __cnfn exp(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn exp(half); half2 __ovld __cnfn exp(half2); half3 __ovld __cnfn exp(half3); half4 __ovld __cnfn exp(half4); half8 __ovld __cnfn exp(half8); half16 __ovld __cnfn exp(half16); #endif //cl_khr_fp16 /** * Exponential base 2 function. */ float __ovld __cnfn exp2(float); float2 __ovld __cnfn exp2(float2); float3 __ovld __cnfn exp2(float3); float4 __ovld __cnfn exp2(float4); float8 __ovld __cnfn exp2(float8); float16 __ovld __cnfn exp2(float16); #ifdef cl_khr_fp64 double __ovld __cnfn exp2(double); double2 __ovld __cnfn exp2(double2); double3 __ovld __cnfn exp2(double3); double4 __ovld __cnfn exp2(double4); double8 __ovld __cnfn exp2(double8); double16 __ovld __cnfn exp2(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn exp2(half); half2 __ovld __cnfn exp2(half2); half3 __ovld __cnfn exp2(half3); half4 __ovld __cnfn exp2(half4); half8 __ovld __cnfn exp2(half8); half16 __ovld __cnfn exp2(half16); #endif //cl_khr_fp16 /** * Exponential base 10 function. */ float __ovld __cnfn exp10(float); float2 __ovld __cnfn exp10(float2); float3 __ovld __cnfn exp10(float3); float4 __ovld __cnfn exp10(float4); float8 __ovld __cnfn exp10(float8); float16 __ovld __cnfn exp10(float16); #ifdef cl_khr_fp64 double __ovld __cnfn exp10(double); double2 __ovld __cnfn exp10(double2); double3 __ovld __cnfn exp10(double3); double4 __ovld __cnfn exp10(double4); double8 __ovld __cnfn exp10(double8); double16 __ovld __cnfn exp10(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn exp10(half); half2 __ovld __cnfn exp10(half2); half3 __ovld __cnfn exp10(half3); half4 __ovld __cnfn exp10(half4); half8 __ovld __cnfn exp10(half8); half16 __ovld __cnfn exp10(half16); #endif //cl_khr_fp16 /** * Compute e^x- 1.0. */ float __ovld __cnfn expm1(float); float2 __ovld __cnfn expm1(float2); float3 __ovld __cnfn expm1(float3); float4 __ovld __cnfn expm1(float4); float8 __ovld __cnfn expm1(float8); float16 __ovld __cnfn expm1(float16); #ifdef cl_khr_fp64 double __ovld __cnfn expm1(double); double2 __ovld __cnfn expm1(double2); double3 __ovld __cnfn expm1(double3); double4 __ovld __cnfn expm1(double4); double8 __ovld __cnfn expm1(double8); double16 __ovld __cnfn expm1(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn expm1(half); half2 __ovld __cnfn expm1(half2); half3 __ovld __cnfn expm1(half3); half4 __ovld __cnfn expm1(half4); half8 __ovld __cnfn expm1(half8); half16 __ovld __cnfn expm1(half16); #endif //cl_khr_fp16 /** * Compute absolute value of a floating-point number. */ float __ovld __cnfn fabs(float); float2 __ovld __cnfn fabs(float2); float3 __ovld __cnfn fabs(float3); float4 __ovld __cnfn fabs(float4); float8 __ovld __cnfn fabs(float8); float16 __ovld __cnfn fabs(float16); #ifdef cl_khr_fp64 double __ovld __cnfn fabs(double); double2 __ovld __cnfn fabs(double2); double3 __ovld __cnfn fabs(double3); double4 __ovld __cnfn fabs(double4); double8 __ovld __cnfn fabs(double8); double16 __ovld __cnfn fabs(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn fabs(half); half2 __ovld __cnfn fabs(half2); half3 __ovld __cnfn fabs(half3); half4 __ovld __cnfn fabs(half4); half8 __ovld __cnfn fabs(half8); half16 __ovld __cnfn fabs(half16); #endif //cl_khr_fp16 /** * x - y if x > y, +0 if x is less than or equal to y. */ float __ovld __cnfn fdim(float, float); float2 __ovld __cnfn fdim(float2, float2); float3 __ovld __cnfn fdim(float3, float3); float4 __ovld __cnfn fdim(float4, float4); float8 __ovld __cnfn fdim(float8, float8); float16 __ovld __cnfn fdim(float16, float16); #ifdef cl_khr_fp64 double __ovld __cnfn fdim(double, double); double2 __ovld __cnfn fdim(double2, double2); double3 __ovld __cnfn fdim(double3, double3); double4 __ovld __cnfn fdim(double4, double4); double8 __ovld __cnfn fdim(double8, double8); double16 __ovld __cnfn fdim(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn fdim(half, half); half2 __ovld __cnfn fdim(half2, half2); half3 __ovld __cnfn fdim(half3, half3); half4 __ovld __cnfn fdim(half4, half4); half8 __ovld __cnfn fdim(half8, half8); half16 __ovld __cnfn fdim(half16, half16); #endif //cl_khr_fp16 /** * Round to integral value using the round to -ve * infinity rounding mode. */ float __ovld __cnfn floor(float); float2 __ovld __cnfn floor(float2); float3 __ovld __cnfn floor(float3); float4 __ovld __cnfn floor(float4); float8 __ovld __cnfn floor(float8); float16 __ovld __cnfn floor(float16); #ifdef cl_khr_fp64 double __ovld __cnfn floor(double); double2 __ovld __cnfn floor(double2); double3 __ovld __cnfn floor(double3); double4 __ovld __cnfn floor(double4); double8 __ovld __cnfn floor(double8); double16 __ovld __cnfn floor(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn floor(half); half2 __ovld __cnfn floor(half2); half3 __ovld __cnfn floor(half3); half4 __ovld __cnfn floor(half4); half8 __ovld __cnfn floor(half8); half16 __ovld __cnfn floor(half16); #endif //cl_khr_fp16 /** * Returns the correctly rounded floating-point * representation of the sum of c with the infinitely * precise product of a and b. Rounding of * intermediate products shall not occur. Edge case * behavior is per the IEEE 754-2008 standard. */ float __ovld __cnfn fma(float, float, float); float2 __ovld __cnfn fma(float2, float2, float2); float3 __ovld __cnfn fma(float3, float3, float3); float4 __ovld __cnfn fma(float4, float4, float4); float8 __ovld __cnfn fma(float8, float8, float8); float16 __ovld __cnfn fma(float16, float16, float16); #ifdef cl_khr_fp64 double __ovld __cnfn fma(double, double, double); double2 __ovld __cnfn fma(double2, double2, double2); double3 __ovld __cnfn fma(double3, double3, double3); double4 __ovld __cnfn fma(double4, double4, double4); double8 __ovld __cnfn fma(double8, double8, double8); double16 __ovld __cnfn fma(double16, double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn fma(half, half, half); half2 __ovld __cnfn fma(half2, half2, half2); half3 __ovld __cnfn fma(half3, half3, half3); half4 __ovld __cnfn fma(half4, half4, half4); half8 __ovld __cnfn fma(half8, half8, half8); half16 __ovld __cnfn fma(half16, half16, half16); #endif //cl_khr_fp16 /** * Returns y if x < y, otherwise it returns x. If one * argument is a NaN, fmax() returns the other * argument. If both arguments are NaNs, fmax() * returns a NaN. */ float __ovld __cnfn fmax(float, float); float2 __ovld __cnfn fmax(float2, float2); float3 __ovld __cnfn fmax(float3, float3); float4 __ovld __cnfn fmax(float4, float4); float8 __ovld __cnfn fmax(float8, float8); float16 __ovld __cnfn fmax(float16, float16); float2 __ovld __cnfn fmax(float2, float); float3 __ovld __cnfn fmax(float3, float); float4 __ovld __cnfn fmax(float4, float); float8 __ovld __cnfn fmax(float8, float); float16 __ovld __cnfn fmax(float16, float); #ifdef cl_khr_fp64 double __ovld __cnfn fmax(double, double); double2 __ovld __cnfn fmax(double2, double2); double3 __ovld __cnfn fmax(double3, double3); double4 __ovld __cnfn fmax(double4, double4); double8 __ovld __cnfn fmax(double8, double8); double16 __ovld __cnfn fmax(double16, double16); double2 __ovld __cnfn fmax(double2, double); double3 __ovld __cnfn fmax(double3, double); double4 __ovld __cnfn fmax(double4, double); double8 __ovld __cnfn fmax(double8, double); double16 __ovld __cnfn fmax(double16, double); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn fmax(half, half); half2 __ovld __cnfn fmax(half2, half2); half3 __ovld __cnfn fmax(half3, half3); half4 __ovld __cnfn fmax(half4, half4); half8 __ovld __cnfn fmax(half8, half8); half16 __ovld __cnfn fmax(half16, half16); half2 __ovld __cnfn fmax(half2, half); half3 __ovld __cnfn fmax(half3, half); half4 __ovld __cnfn fmax(half4, half); half8 __ovld __cnfn fmax(half8, half); half16 __ovld __cnfn fmax(half16, half); #endif //cl_khr_fp16 /** * Returns y if y < x, otherwise it returns x. If one * argument is a NaN, fmin() returns the other * argument. If both arguments are NaNs, fmin() * returns a NaN. */ float __ovld __cnfn fmin(float, float); float2 __ovld __cnfn fmin(float2, float2); float3 __ovld __cnfn fmin(float3, float3); float4 __ovld __cnfn fmin(float4, float4); float8 __ovld __cnfn fmin(float8, float8); float16 __ovld __cnfn fmin(float16, float16); float2 __ovld __cnfn fmin(float2, float); float3 __ovld __cnfn fmin(float3, float); float4 __ovld __cnfn fmin(float4, float); float8 __ovld __cnfn fmin(float8, float); float16 __ovld __cnfn fmin(float16, float); #ifdef cl_khr_fp64 double __ovld __cnfn fmin(double, double); double2 __ovld __cnfn fmin(double2, double2); double3 __ovld __cnfn fmin(double3, double3); double4 __ovld __cnfn fmin(double4, double4); double8 __ovld __cnfn fmin(double8, double8); double16 __ovld __cnfn fmin(double16, double16); double2 __ovld __cnfn fmin(double2, double); double3 __ovld __cnfn fmin(double3, double); double4 __ovld __cnfn fmin(double4, double); double8 __ovld __cnfn fmin(double8, double); double16 __ovld __cnfn fmin(double16, double); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn fmin(half, half); half2 __ovld __cnfn fmin(half2, half2); half3 __ovld __cnfn fmin(half3, half3); half4 __ovld __cnfn fmin(half4, half4); half8 __ovld __cnfn fmin(half8, half8); half16 __ovld __cnfn fmin(half16, half16); half2 __ovld __cnfn fmin(half2, half); half3 __ovld __cnfn fmin(half3, half); half4 __ovld __cnfn fmin(half4, half); half8 __ovld __cnfn fmin(half8, half); half16 __ovld __cnfn fmin(half16, half); #endif //cl_khr_fp16 /** * Modulus. Returns x - y * trunc (x/y). */ float __ovld __cnfn fmod(float, float); float2 __ovld __cnfn fmod(float2, float2); float3 __ovld __cnfn fmod(float3, float3); float4 __ovld __cnfn fmod(float4, float4); float8 __ovld __cnfn fmod(float8, float8); float16 __ovld __cnfn fmod(float16, float16); #ifdef cl_khr_fp64 double __ovld __cnfn fmod(double, double); double2 __ovld __cnfn fmod(double2, double2); double3 __ovld __cnfn fmod(double3, double3); double4 __ovld __cnfn fmod(double4, double4); double8 __ovld __cnfn fmod(double8, double8); double16 __ovld __cnfn fmod(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn fmod(half, half); half2 __ovld __cnfn fmod(half2, half2); half3 __ovld __cnfn fmod(half3, half3); half4 __ovld __cnfn fmod(half4, half4); half8 __ovld __cnfn fmod(half8, half8); half16 __ovld __cnfn fmod(half16, half16); #endif //cl_khr_fp16 /** * Returns fmin(x - floor (x), 0x1.fffffep-1f ). * floor(x) is returned in iptr. */ #if defined(__opencl_c_generic_address_space) float __ovld fract(float, float *); float2 __ovld fract(float2, float2 *); float3 __ovld fract(float3, float3 *); float4 __ovld fract(float4, float4 *); float8 __ovld fract(float8, float8 *); float16 __ovld fract(float16, float16 *); #ifdef cl_khr_fp64 double __ovld fract(double, double *); double2 __ovld fract(double2, double2 *); double3 __ovld fract(double3, double3 *); double4 __ovld fract(double4, double4 *); double8 __ovld fract(double8, double8 *); double16 __ovld fract(double16, double16 *); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld fract(half, half *); half2 __ovld fract(half2, half2 *); half3 __ovld fract(half3, half3 *); half4 __ovld fract(half4, half4 *); half8 __ovld fract(half8, half8 *); half16 __ovld fract(half16, half16 *); #endif //cl_khr_fp16 #endif //defined(__opencl_c_generic_address_space) #if defined(__opencl_c_named_address_space_builtins) float __ovld fract(float, __global float *); float2 __ovld fract(float2, __global float2 *); float3 __ovld fract(float3, __global float3 *); float4 __ovld fract(float4, __global float4 *); float8 __ovld fract(float8, __global float8 *); float16 __ovld fract(float16, __global float16 *); float __ovld fract(float, __local float *); float2 __ovld fract(float2, __local float2 *); float3 __ovld fract(float3, __local float3 *); float4 __ovld fract(float4, __local float4 *); float8 __ovld fract(float8, __local float8 *); float16 __ovld fract(float16, __local float16 *); float __ovld fract(float, __private float *); float2 __ovld fract(float2, __private float2 *); float3 __ovld fract(float3, __private float3 *); float4 __ovld fract(float4, __private float4 *); float8 __ovld fract(float8, __private float8 *); float16 __ovld fract(float16, __private float16 *); #ifdef cl_khr_fp64 double __ovld fract(double, __global double *); double2 __ovld fract(double2, __global double2 *); double3 __ovld fract(double3, __global double3 *); double4 __ovld fract(double4, __global double4 *); double8 __ovld fract(double8, __global double8 *); double16 __ovld fract(double16, __global double16 *); double __ovld fract(double, __local double *); double2 __ovld fract(double2, __local double2 *); double3 __ovld fract(double3, __local double3 *); double4 __ovld fract(double4, __local double4 *); double8 __ovld fract(double8, __local double8 *); double16 __ovld fract(double16, __local double16 *); double __ovld fract(double, __private double *); double2 __ovld fract(double2, __private double2 *); double3 __ovld fract(double3, __private double3 *); double4 __ovld fract(double4, __private double4 *); double8 __ovld fract(double8, __private double8 *); double16 __ovld fract(double16, __private double16 *); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld fract(half, __global half *); half2 __ovld fract(half2, __global half2 *); half3 __ovld fract(half3, __global half3 *); half4 __ovld fract(half4, __global half4 *); half8 __ovld fract(half8, __global half8 *); half16 __ovld fract(half16, __global half16 *); half __ovld fract(half, __local half *); half2 __ovld fract(half2, __local half2 *); half3 __ovld fract(half3, __local half3 *); half4 __ovld fract(half4, __local half4 *); half8 __ovld fract(half8, __local half8 *); half16 __ovld fract(half16, __local half16 *); half __ovld fract(half, __private half *); half2 __ovld fract(half2, __private half2 *); half3 __ovld fract(half3, __private half3 *); half4 __ovld fract(half4, __private half4 *); half8 __ovld fract(half8, __private half8 *); half16 __ovld fract(half16, __private half16 *); #endif //cl_khr_fp16 #endif //defined(__opencl_c_named_address_space_builtins) /** * Extract mantissa and exponent from x. For each * component the mantissa returned is a float with * magnitude in the interval [1/2, 1) or 0. Each * component of x equals mantissa returned * 2^exp. */ #if defined(__opencl_c_generic_address_space) float __ovld frexp(float, int *); float2 __ovld frexp(float2, int2 *); float3 __ovld frexp(float3, int3 *); float4 __ovld frexp(float4, int4 *); float8 __ovld frexp(float8, int8 *); float16 __ovld frexp(float16, int16 *); #ifdef cl_khr_fp64 double __ovld frexp(double, int *); double2 __ovld frexp(double2, int2 *); double3 __ovld frexp(double3, int3 *); double4 __ovld frexp(double4, int4 *); double8 __ovld frexp(double8, int8 *); double16 __ovld frexp(double16, int16 *); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld frexp(half, int *); half2 __ovld frexp(half2, int2 *); half3 __ovld frexp(half3, int3 *); half4 __ovld frexp(half4, int4 *); half8 __ovld frexp(half8, int8 *); half16 __ovld frexp(half16, int16 *); #endif //cl_khr_fp16 #endif //defined(__opencl_c_generic_address_space) #if defined(__opencl_c_named_address_space_builtins) float __ovld frexp(float, __global int *); float2 __ovld frexp(float2, __global int2 *); float3 __ovld frexp(float3, __global int3 *); float4 __ovld frexp(float4, __global int4 *); float8 __ovld frexp(float8, __global int8 *); float16 __ovld frexp(float16, __global int16 *); float __ovld frexp(float, __local int *); float2 __ovld frexp(float2, __local int2 *); float3 __ovld frexp(float3, __local int3 *); float4 __ovld frexp(float4, __local int4 *); float8 __ovld frexp(float8, __local int8 *); float16 __ovld frexp(float16, __local int16 *); float __ovld frexp(float, __private int *); float2 __ovld frexp(float2, __private int2 *); float3 __ovld frexp(float3, __private int3 *); float4 __ovld frexp(float4, __private int4 *); float8 __ovld frexp(float8, __private int8 *); float16 __ovld frexp(float16, __private int16 *); #ifdef cl_khr_fp64 double __ovld frexp(double, __global int *); double2 __ovld frexp(double2, __global int2 *); double3 __ovld frexp(double3, __global int3 *); double4 __ovld frexp(double4, __global int4 *); double8 __ovld frexp(double8, __global int8 *); double16 __ovld frexp(double16, __global int16 *); double __ovld frexp(double, __local int *); double2 __ovld frexp(double2, __local int2 *); double3 __ovld frexp(double3, __local int3 *); double4 __ovld frexp(double4, __local int4 *); double8 __ovld frexp(double8, __local int8 *); double16 __ovld frexp(double16, __local int16 *); double __ovld frexp(double, __private int *); double2 __ovld frexp(double2, __private int2 *); double3 __ovld frexp(double3, __private int3 *); double4 __ovld frexp(double4, __private int4 *); double8 __ovld frexp(double8, __private int8 *); double16 __ovld frexp(double16, __private int16 *); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld frexp(half, __global int *); half2 __ovld frexp(half2, __global int2 *); half3 __ovld frexp(half3, __global int3 *); half4 __ovld frexp(half4, __global int4 *); half8 __ovld frexp(half8, __global int8 *); half16 __ovld frexp(half16, __global int16 *); half __ovld frexp(half, __local int *); half2 __ovld frexp(half2, __local int2 *); half3 __ovld frexp(half3, __local int3 *); half4 __ovld frexp(half4, __local int4 *); half8 __ovld frexp(half8, __local int8 *); half16 __ovld frexp(half16, __local int16 *); half __ovld frexp(half, __private int *); half2 __ovld frexp(half2, __private int2 *); half3 __ovld frexp(half3, __private int3 *); half4 __ovld frexp(half4, __private int4 *); half8 __ovld frexp(half8, __private int8 *); half16 __ovld frexp(half16, __private int16 *); #endif //cl_khr_fp16 #endif //defined(__opencl_c_named_address_space_builtins) /** * Compute the value of the square root of x^2 + y^2 * without undue overflow or underflow. */ float __ovld __cnfn hypot(float, float); float2 __ovld __cnfn hypot(float2, float2); float3 __ovld __cnfn hypot(float3, float3); float4 __ovld __cnfn hypot(float4, float4); float8 __ovld __cnfn hypot(float8, float8); float16 __ovld __cnfn hypot(float16, float16); #ifdef cl_khr_fp64 double __ovld __cnfn hypot(double, double); double2 __ovld __cnfn hypot(double2, double2); double3 __ovld __cnfn hypot(double3, double3); double4 __ovld __cnfn hypot(double4, double4); double8 __ovld __cnfn hypot(double8, double8); double16 __ovld __cnfn hypot(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn hypot(half, half); half2 __ovld __cnfn hypot(half2, half2); half3 __ovld __cnfn hypot(half3, half3); half4 __ovld __cnfn hypot(half4, half4); half8 __ovld __cnfn hypot(half8, half8); half16 __ovld __cnfn hypot(half16, half16); #endif //cl_khr_fp16 /** * Return the exponent as an integer value. */ int __ovld __cnfn ilogb(float); int2 __ovld __cnfn ilogb(float2); int3 __ovld __cnfn ilogb(float3); int4 __ovld __cnfn ilogb(float4); int8 __ovld __cnfn ilogb(float8); int16 __ovld __cnfn ilogb(float16); #ifdef cl_khr_fp64 int __ovld __cnfn ilogb(double); int2 __ovld __cnfn ilogb(double2); int3 __ovld __cnfn ilogb(double3); int4 __ovld __cnfn ilogb(double4); int8 __ovld __cnfn ilogb(double8); int16 __ovld __cnfn ilogb(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 int __ovld __cnfn ilogb(half); int2 __ovld __cnfn ilogb(half2); int3 __ovld __cnfn ilogb(half3); int4 __ovld __cnfn ilogb(half4); int8 __ovld __cnfn ilogb(half8); int16 __ovld __cnfn ilogb(half16); #endif //cl_khr_fp16 /** * Multiply x by 2 to the power n. */ float __ovld __cnfn ldexp(float, int); float2 __ovld __cnfn ldexp(float2, int2); float3 __ovld __cnfn ldexp(float3, int3); float4 __ovld __cnfn ldexp(float4, int4); float8 __ovld __cnfn ldexp(float8, int8); float16 __ovld __cnfn ldexp(float16, int16); float2 __ovld __cnfn ldexp(float2, int); float3 __ovld __cnfn ldexp(float3, int); float4 __ovld __cnfn ldexp(float4, int); float8 __ovld __cnfn ldexp(float8, int); float16 __ovld __cnfn ldexp(float16, int); #ifdef cl_khr_fp64 double __ovld __cnfn ldexp(double, int); double2 __ovld __cnfn ldexp(double2, int2); double3 __ovld __cnfn ldexp(double3, int3); double4 __ovld __cnfn ldexp(double4, int4); double8 __ovld __cnfn ldexp(double8, int8); double16 __ovld __cnfn ldexp(double16, int16); double2 __ovld __cnfn ldexp(double2, int); double3 __ovld __cnfn ldexp(double3, int); double4 __ovld __cnfn ldexp(double4, int); double8 __ovld __cnfn ldexp(double8, int); double16 __ovld __cnfn ldexp(double16, int); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn ldexp(half, int); half2 __ovld __cnfn ldexp(half2, int2); half3 __ovld __cnfn ldexp(half3, int3); half4 __ovld __cnfn ldexp(half4, int4); half8 __ovld __cnfn ldexp(half8, int8); half16 __ovld __cnfn ldexp(half16, int16); half2 __ovld __cnfn ldexp(half2, int); half3 __ovld __cnfn ldexp(half3, int); half4 __ovld __cnfn ldexp(half4, int); half8 __ovld __cnfn ldexp(half8, int); half16 __ovld __cnfn ldexp(half16, int); #endif //cl_khr_fp16 /** * Log gamma function. Returns the natural * logarithm of the absolute value of the gamma * function. The sign of the gamma function is * returned in the signp argument of lgamma_r. */ float __ovld __cnfn lgamma(float); float2 __ovld __cnfn lgamma(float2); float3 __ovld __cnfn lgamma(float3); float4 __ovld __cnfn lgamma(float4); float8 __ovld __cnfn lgamma(float8); float16 __ovld __cnfn lgamma(float16); #ifdef cl_khr_fp64 double __ovld __cnfn lgamma(double); double2 __ovld __cnfn lgamma(double2); double3 __ovld __cnfn lgamma(double3); double4 __ovld __cnfn lgamma(double4); double8 __ovld __cnfn lgamma(double8); double16 __ovld __cnfn lgamma(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn lgamma(half); half2 __ovld __cnfn lgamma(half2); half3 __ovld __cnfn lgamma(half3); half4 __ovld __cnfn lgamma(half4); half8 __ovld __cnfn lgamma(half8); half16 __ovld __cnfn lgamma(half16); #endif //cl_khr_fp16 #if defined(__opencl_c_generic_address_space) float __ovld lgamma_r(float, int *); float2 __ovld lgamma_r(float2, int2 *); float3 __ovld lgamma_r(float3, int3 *); float4 __ovld lgamma_r(float4, int4 *); float8 __ovld lgamma_r(float8, int8 *); float16 __ovld lgamma_r(float16, int16 *); #ifdef cl_khr_fp64 double __ovld lgamma_r(double, int *); double2 __ovld lgamma_r(double2, int2 *); double3 __ovld lgamma_r(double3, int3 *); double4 __ovld lgamma_r(double4, int4 *); double8 __ovld lgamma_r(double8, int8 *); double16 __ovld lgamma_r(double16, int16 *); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld lgamma_r(half, int *); half2 __ovld lgamma_r(half2, int2 *); half3 __ovld lgamma_r(half3, int3 *); half4 __ovld lgamma_r(half4, int4 *); half8 __ovld lgamma_r(half8, int8 *); half16 __ovld lgamma_r(half16, int16 *); #endif //cl_khr_fp16 #endif //defined(__opencl_c_generic_address_space) #if defined(__opencl_c_named_address_space_builtins) float __ovld lgamma_r(float, __global int *); float2 __ovld lgamma_r(float2, __global int2 *); float3 __ovld lgamma_r(float3, __global int3 *); float4 __ovld lgamma_r(float4, __global int4 *); float8 __ovld lgamma_r(float8, __global int8 *); float16 __ovld lgamma_r(float16, __global int16 *); float __ovld lgamma_r(float, __local int *); float2 __ovld lgamma_r(float2, __local int2 *); float3 __ovld lgamma_r(float3, __local int3 *); float4 __ovld lgamma_r(float4, __local int4 *); float8 __ovld lgamma_r(float8, __local int8 *); float16 __ovld lgamma_r(float16, __local int16 *); float __ovld lgamma_r(float, __private int *); float2 __ovld lgamma_r(float2, __private int2 *); float3 __ovld lgamma_r(float3, __private int3 *); float4 __ovld lgamma_r(float4, __private int4 *); float8 __ovld lgamma_r(float8, __private int8 *); float16 __ovld lgamma_r(float16, __private int16 *); #ifdef cl_khr_fp64 double __ovld lgamma_r(double, __global int *); double2 __ovld lgamma_r(double2, __global int2 *); double3 __ovld lgamma_r(double3, __global int3 *); double4 __ovld lgamma_r(double4, __global int4 *); double8 __ovld lgamma_r(double8, __global int8 *); double16 __ovld lgamma_r(double16, __global int16 *); double __ovld lgamma_r(double, __local int *); double2 __ovld lgamma_r(double2, __local int2 *); double3 __ovld lgamma_r(double3, __local int3 *); double4 __ovld lgamma_r(double4, __local int4 *); double8 __ovld lgamma_r(double8, __local int8 *); double16 __ovld lgamma_r(double16, __local int16 *); double __ovld lgamma_r(double, __private int *); double2 __ovld lgamma_r(double2, __private int2 *); double3 __ovld lgamma_r(double3, __private int3 *); double4 __ovld lgamma_r(double4, __private int4 *); double8 __ovld lgamma_r(double8, __private int8 *); double16 __ovld lgamma_r(double16, __private int16 *); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld lgamma_r(half, __global int *); half2 __ovld lgamma_r(half2, __global int2 *); half3 __ovld lgamma_r(half3, __global int3 *); half4 __ovld lgamma_r(half4, __global int4 *); half8 __ovld lgamma_r(half8, __global int8 *); half16 __ovld lgamma_r(half16, __global int16 *); half __ovld lgamma_r(half, __local int *); half2 __ovld lgamma_r(half2, __local int2 *); half3 __ovld lgamma_r(half3, __local int3 *); half4 __ovld lgamma_r(half4, __local int4 *); half8 __ovld lgamma_r(half8, __local int8 *); half16 __ovld lgamma_r(half16, __local int16 *); half __ovld lgamma_r(half, __private int *); half2 __ovld lgamma_r(half2, __private int2 *); half3 __ovld lgamma_r(half3, __private int3 *); half4 __ovld lgamma_r(half4, __private int4 *); half8 __ovld lgamma_r(half8, __private int8 *); half16 __ovld lgamma_r(half16, __private int16 *); #endif //cl_khr_fp16 #endif //defined(__opencl_c_named_address_space_builtins) /** * Compute natural logarithm. */ float __ovld __cnfn log(float); float2 __ovld __cnfn log(float2); float3 __ovld __cnfn log(float3); float4 __ovld __cnfn log(float4); float8 __ovld __cnfn log(float8); float16 __ovld __cnfn log(float16); #ifdef cl_khr_fp64 double __ovld __cnfn log(double); double2 __ovld __cnfn log(double2); double3 __ovld __cnfn log(double3); double4 __ovld __cnfn log(double4); double8 __ovld __cnfn log(double8); double16 __ovld __cnfn log(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn log(half); half2 __ovld __cnfn log(half2); half3 __ovld __cnfn log(half3); half4 __ovld __cnfn log(half4); half8 __ovld __cnfn log(half8); half16 __ovld __cnfn log(half16); #endif //cl_khr_fp16 /** * Compute a base 2 logarithm. */ float __ovld __cnfn log2(float); float2 __ovld __cnfn log2(float2); float3 __ovld __cnfn log2(float3); float4 __ovld __cnfn log2(float4); float8 __ovld __cnfn log2(float8); float16 __ovld __cnfn log2(float16); #ifdef cl_khr_fp64 double __ovld __cnfn log2(double); double2 __ovld __cnfn log2(double2); double3 __ovld __cnfn log2(double3); double4 __ovld __cnfn log2(double4); double8 __ovld __cnfn log2(double8); double16 __ovld __cnfn log2(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn log2(half); half2 __ovld __cnfn log2(half2); half3 __ovld __cnfn log2(half3); half4 __ovld __cnfn log2(half4); half8 __ovld __cnfn log2(half8); half16 __ovld __cnfn log2(half16); #endif //cl_khr_fp16 /** * Compute a base 10 logarithm. */ float __ovld __cnfn log10(float); float2 __ovld __cnfn log10(float2); float3 __ovld __cnfn log10(float3); float4 __ovld __cnfn log10(float4); float8 __ovld __cnfn log10(float8); float16 __ovld __cnfn log10(float16); #ifdef cl_khr_fp64 double __ovld __cnfn log10(double); double2 __ovld __cnfn log10(double2); double3 __ovld __cnfn log10(double3); double4 __ovld __cnfn log10(double4); double8 __ovld __cnfn log10(double8); double16 __ovld __cnfn log10(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn log10(half); half2 __ovld __cnfn log10(half2); half3 __ovld __cnfn log10(half3); half4 __ovld __cnfn log10(half4); half8 __ovld __cnfn log10(half8); half16 __ovld __cnfn log10(half16); #endif //cl_khr_fp16 /** * Compute a base e logarithm of (1.0 + x). */ float __ovld __cnfn log1p(float); float2 __ovld __cnfn log1p(float2); float3 __ovld __cnfn log1p(float3); float4 __ovld __cnfn log1p(float4); float8 __ovld __cnfn log1p(float8); float16 __ovld __cnfn log1p(float16); #ifdef cl_khr_fp64 double __ovld __cnfn log1p(double); double2 __ovld __cnfn log1p(double2); double3 __ovld __cnfn log1p(double3); double4 __ovld __cnfn log1p(double4); double8 __ovld __cnfn log1p(double8); double16 __ovld __cnfn log1p(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn log1p(half); half2 __ovld __cnfn log1p(half2); half3 __ovld __cnfn log1p(half3); half4 __ovld __cnfn log1p(half4); half8 __ovld __cnfn log1p(half8); half16 __ovld __cnfn log1p(half16); #endif //cl_khr_fp16 /** * Compute the exponent of x, which is the integral * part of logr | x |. */ float __ovld __cnfn logb(float); float2 __ovld __cnfn logb(float2); float3 __ovld __cnfn logb(float3); float4 __ovld __cnfn logb(float4); float8 __ovld __cnfn logb(float8); float16 __ovld __cnfn logb(float16); #ifdef cl_khr_fp64 double __ovld __cnfn logb(double); double2 __ovld __cnfn logb(double2); double3 __ovld __cnfn logb(double3); double4 __ovld __cnfn logb(double4); double8 __ovld __cnfn logb(double8); double16 __ovld __cnfn logb(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn logb(half); half2 __ovld __cnfn logb(half2); half3 __ovld __cnfn logb(half3); half4 __ovld __cnfn logb(half4); half8 __ovld __cnfn logb(half8); half16 __ovld __cnfn logb(half16); #endif //cl_khr_fp16 /** * mad approximates a * b + c. Whether or how the * product of a * b is rounded and how supernormal or * subnormal intermediate products are handled is not * defined. mad is intended to be used where speed is * preferred over accuracy. */ float __ovld __cnfn mad(float, float, float); float2 __ovld __cnfn mad(float2, float2, float2); float3 __ovld __cnfn mad(float3, float3, float3); float4 __ovld __cnfn mad(float4, float4, float4); float8 __ovld __cnfn mad(float8, float8, float8); float16 __ovld __cnfn mad(float16, float16, float16); #ifdef cl_khr_fp64 double __ovld __cnfn mad(double, double, double); double2 __ovld __cnfn mad(double2, double2, double2); double3 __ovld __cnfn mad(double3, double3, double3); double4 __ovld __cnfn mad(double4, double4, double4); double8 __ovld __cnfn mad(double8, double8, double8); double16 __ovld __cnfn mad(double16, double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn mad(half, half, half); half2 __ovld __cnfn mad(half2, half2, half2); half3 __ovld __cnfn mad(half3, half3, half3); half4 __ovld __cnfn mad(half4, half4, half4); half8 __ovld __cnfn mad(half8, half8, half8); half16 __ovld __cnfn mad(half16, half16, half16); #endif //cl_khr_fp16 /** * Returns x if | x | > | y |, y if | y | > | x |, otherwise * fmax(x, y). */ float __ovld __cnfn maxmag(float, float); float2 __ovld __cnfn maxmag(float2, float2); float3 __ovld __cnfn maxmag(float3, float3); float4 __ovld __cnfn maxmag(float4, float4); float8 __ovld __cnfn maxmag(float8, float8); float16 __ovld __cnfn maxmag(float16, float16); #ifdef cl_khr_fp64 double __ovld __cnfn maxmag(double, double); double2 __ovld __cnfn maxmag(double2, double2); double3 __ovld __cnfn maxmag(double3, double3); double4 __ovld __cnfn maxmag(double4, double4); double8 __ovld __cnfn maxmag(double8, double8); double16 __ovld __cnfn maxmag(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn maxmag(half, half); half2 __ovld __cnfn maxmag(half2, half2); half3 __ovld __cnfn maxmag(half3, half3); half4 __ovld __cnfn maxmag(half4, half4); half8 __ovld __cnfn maxmag(half8, half8); half16 __ovld __cnfn maxmag(half16, half16); #endif //cl_khr_fp16 /** * Returns x if | x | < | y |, y if | y | < | x |, otherwise * fmin(x, y). */ float __ovld __cnfn minmag(float, float); float2 __ovld __cnfn minmag(float2, float2); float3 __ovld __cnfn minmag(float3, float3); float4 __ovld __cnfn minmag(float4, float4); float8 __ovld __cnfn minmag(float8, float8); float16 __ovld __cnfn minmag(float16, float16); #ifdef cl_khr_fp64 double __ovld __cnfn minmag(double, double); double2 __ovld __cnfn minmag(double2, double2); double3 __ovld __cnfn minmag(double3, double3); double4 __ovld __cnfn minmag(double4, double4); double8 __ovld __cnfn minmag(double8, double8); double16 __ovld __cnfn minmag(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn minmag(half, half); half2 __ovld __cnfn minmag(half2, half2); half3 __ovld __cnfn minmag(half3, half3); half4 __ovld __cnfn minmag(half4, half4); half8 __ovld __cnfn minmag(half8, half8); half16 __ovld __cnfn minmag(half16, half16); #endif //cl_khr_fp16 /** * Decompose a floating-point number. The modf * function breaks the argument x into integral and * fractional parts, each of which has the same sign as * the argument. It stores the integral part in the object * pointed to by iptr. */ #if defined(__opencl_c_generic_address_space) float __ovld modf(float, float *); float2 __ovld modf(float2, float2 *); float3 __ovld modf(float3, float3 *); float4 __ovld modf(float4, float4 *); float8 __ovld modf(float8, float8 *); float16 __ovld modf(float16, float16 *); #ifdef cl_khr_fp64 double __ovld modf(double, double *); double2 __ovld modf(double2, double2 *); double3 __ovld modf(double3, double3 *); double4 __ovld modf(double4, double4 *); double8 __ovld modf(double8, double8 *); double16 __ovld modf(double16, double16 *); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld modf(half, half *); half2 __ovld modf(half2, half2 *); half3 __ovld modf(half3, half3 *); half4 __ovld modf(half4, half4 *); half8 __ovld modf(half8, half8 *); half16 __ovld modf(half16, half16 *); #endif //cl_khr_fp16 #endif //defined(__opencl_c_generic_address_space) #if defined(__opencl_c_named_address_space_builtins) float __ovld modf(float, __global float *); float2 __ovld modf(float2, __global float2 *); float3 __ovld modf(float3, __global float3 *); float4 __ovld modf(float4, __global float4 *); float8 __ovld modf(float8, __global float8 *); float16 __ovld modf(float16, __global float16 *); float __ovld modf(float, __local float *); float2 __ovld modf(float2, __local float2 *); float3 __ovld modf(float3, __local float3 *); float4 __ovld modf(float4, __local float4 *); float8 __ovld modf(float8, __local float8 *); float16 __ovld modf(float16, __local float16 *); float __ovld modf(float, __private float *); float2 __ovld modf(float2, __private float2 *); float3 __ovld modf(float3, __private float3 *); float4 __ovld modf(float4, __private float4 *); float8 __ovld modf(float8, __private float8 *); float16 __ovld modf(float16, __private float16 *); #ifdef cl_khr_fp64 double __ovld modf(double, __global double *); double2 __ovld modf(double2, __global double2 *); double3 __ovld modf(double3, __global double3 *); double4 __ovld modf(double4, __global double4 *); double8 __ovld modf(double8, __global double8 *); double16 __ovld modf(double16, __global double16 *); double __ovld modf(double, __local double *); double2 __ovld modf(double2, __local double2 *); double3 __ovld modf(double3, __local double3 *); double4 __ovld modf(double4, __local double4 *); double8 __ovld modf(double8, __local double8 *); double16 __ovld modf(double16, __local double16 *); double __ovld modf(double, __private double *); double2 __ovld modf(double2, __private double2 *); double3 __ovld modf(double3, __private double3 *); double4 __ovld modf(double4, __private double4 *); double8 __ovld modf(double8, __private double8 *); double16 __ovld modf(double16, __private double16 *); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld modf(half, __global half *); half2 __ovld modf(half2, __global half2 *); half3 __ovld modf(half3, __global half3 *); half4 __ovld modf(half4, __global half4 *); half8 __ovld modf(half8, __global half8 *); half16 __ovld modf(half16, __global half16 *); half __ovld modf(half, __local half *); half2 __ovld modf(half2, __local half2 *); half3 __ovld modf(half3, __local half3 *); half4 __ovld modf(half4, __local half4 *); half8 __ovld modf(half8, __local half8 *); half16 __ovld modf(half16, __local half16 *); half __ovld modf(half, __private half *); half2 __ovld modf(half2, __private half2 *); half3 __ovld modf(half3, __private half3 *); half4 __ovld modf(half4, __private half4 *); half8 __ovld modf(half8, __private half8 *); half16 __ovld modf(half16, __private half16 *); #endif //cl_khr_fp16 #endif //defined(__opencl_c_named_address_space_builtins) /** * Returns a quiet NaN. The nancode may be placed * in the significand of the resulting NaN. */ float __ovld __cnfn nan(uint); float2 __ovld __cnfn nan(uint2); float3 __ovld __cnfn nan(uint3); float4 __ovld __cnfn nan(uint4); float8 __ovld __cnfn nan(uint8); float16 __ovld __cnfn nan(uint16); #ifdef cl_khr_fp64 double __ovld __cnfn nan(ulong); double2 __ovld __cnfn nan(ulong2); double3 __ovld __cnfn nan(ulong3); double4 __ovld __cnfn nan(ulong4); double8 __ovld __cnfn nan(ulong8); double16 __ovld __cnfn nan(ulong16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn nan(ushort); half2 __ovld __cnfn nan(ushort2); half3 __ovld __cnfn nan(ushort3); half4 __ovld __cnfn nan(ushort4); half8 __ovld __cnfn nan(ushort8); half16 __ovld __cnfn nan(ushort16); #endif //cl_khr_fp16 /** * Computes the next representable single-precision * floating-point value following x in the direction of * y. Thus, if y is less than x, nextafter() returns the * largest representable floating-point number less * than x. */ float __ovld __cnfn nextafter(float, float); float2 __ovld __cnfn nextafter(float2, float2); float3 __ovld __cnfn nextafter(float3, float3); float4 __ovld __cnfn nextafter(float4, float4); float8 __ovld __cnfn nextafter(float8, float8); float16 __ovld __cnfn nextafter(float16, float16); #ifdef cl_khr_fp64 double __ovld __cnfn nextafter(double, double); double2 __ovld __cnfn nextafter(double2, double2); double3 __ovld __cnfn nextafter(double3, double3); double4 __ovld __cnfn nextafter(double4, double4); double8 __ovld __cnfn nextafter(double8, double8); double16 __ovld __cnfn nextafter(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn nextafter(half, half); half2 __ovld __cnfn nextafter(half2, half2); half3 __ovld __cnfn nextafter(half3, half3); half4 __ovld __cnfn nextafter(half4, half4); half8 __ovld __cnfn nextafter(half8, half8); half16 __ovld __cnfn nextafter(half16, half16); #endif //cl_khr_fp16 /** * Compute x to the power y. */ float __ovld __cnfn pow(float, float); float2 __ovld __cnfn pow(float2, float2); float3 __ovld __cnfn pow(float3, float3); float4 __ovld __cnfn pow(float4, float4); float8 __ovld __cnfn pow(float8, float8); float16 __ovld __cnfn pow(float16, float16); #ifdef cl_khr_fp64 double __ovld __cnfn pow(double, double); double2 __ovld __cnfn pow(double2, double2); double3 __ovld __cnfn pow(double3, double3); double4 __ovld __cnfn pow(double4, double4); double8 __ovld __cnfn pow(double8, double8); double16 __ovld __cnfn pow(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn pow(half, half); half2 __ovld __cnfn pow(half2, half2); half3 __ovld __cnfn pow(half3, half3); half4 __ovld __cnfn pow(half4, half4); half8 __ovld __cnfn pow(half8, half8); half16 __ovld __cnfn pow(half16, half16); #endif //cl_khr_fp16 /** * Compute x to the power y, where y is an integer. */ float __ovld __cnfn pown(float, int); float2 __ovld __cnfn pown(float2, int2); float3 __ovld __cnfn pown(float3, int3); float4 __ovld __cnfn pown(float4, int4); float8 __ovld __cnfn pown(float8, int8); float16 __ovld __cnfn pown(float16, int16); #ifdef cl_khr_fp64 double __ovld __cnfn pown(double, int); double2 __ovld __cnfn pown(double2, int2); double3 __ovld __cnfn pown(double3, int3); double4 __ovld __cnfn pown(double4, int4); double8 __ovld __cnfn pown(double8, int8); double16 __ovld __cnfn pown(double16, int16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn pown(half, int); half2 __ovld __cnfn pown(half2, int2); half3 __ovld __cnfn pown(half3, int3); half4 __ovld __cnfn pown(half4, int4); half8 __ovld __cnfn pown(half8, int8); half16 __ovld __cnfn pown(half16, int16); #endif //cl_khr_fp16 /** * Compute x to the power y, where x is >= 0. */ float __ovld __cnfn powr(float, float); float2 __ovld __cnfn powr(float2, float2); float3 __ovld __cnfn powr(float3, float3); float4 __ovld __cnfn powr(float4, float4); float8 __ovld __cnfn powr(float8, float8); float16 __ovld __cnfn powr(float16, float16); #ifdef cl_khr_fp64 double __ovld __cnfn powr(double, double); double2 __ovld __cnfn powr(double2, double2); double3 __ovld __cnfn powr(double3, double3); double4 __ovld __cnfn powr(double4, double4); double8 __ovld __cnfn powr(double8, double8); double16 __ovld __cnfn powr(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn powr(half, half); half2 __ovld __cnfn powr(half2, half2); half3 __ovld __cnfn powr(half3, half3); half4 __ovld __cnfn powr(half4, half4); half8 __ovld __cnfn powr(half8, half8); half16 __ovld __cnfn powr(half16, half16); #endif //cl_khr_fp16 /** * Compute the value r such that r = x - n*y, where n * is the integer nearest the exact value of x/y. If there * are two integers closest to x/y, n shall be the even * one. If r is zero, it is given the same sign as x. */ float __ovld __cnfn remainder(float, float); float2 __ovld __cnfn remainder(float2, float2); float3 __ovld __cnfn remainder(float3, float3); float4 __ovld __cnfn remainder(float4, float4); float8 __ovld __cnfn remainder(float8, float8); float16 __ovld __cnfn remainder(float16, float16); #ifdef cl_khr_fp64 double __ovld __cnfn remainder(double, double); double2 __ovld __cnfn remainder(double2, double2); double3 __ovld __cnfn remainder(double3, double3); double4 __ovld __cnfn remainder(double4, double4); double8 __ovld __cnfn remainder(double8, double8); double16 __ovld __cnfn remainder(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn remainder(half, half); half2 __ovld __cnfn remainder(half2, half2); half3 __ovld __cnfn remainder(half3, half3); half4 __ovld __cnfn remainder(half4, half4); half8 __ovld __cnfn remainder(half8, half8); half16 __ovld __cnfn remainder(half16, half16); #endif //cl_khr_fp16 /** * The remquo function computes the value r such * that r = x - n*y, where n is the integer nearest the * exact value of x/y. If there are two integers closest * to x/y, n shall be the even one. If r is zero, it is * given the same sign as x. This is the same value * that is returned by the remainder function. * remquo also calculates the lower seven bits of the * integral quotient x/y, and gives that value the same * sign as x/y. It stores this signed value in the object * pointed to by quo. */ #if defined(__opencl_c_generic_address_space) float __ovld remquo(float, float, int *); float2 __ovld remquo(float2, float2, int2 *); float3 __ovld remquo(float3, float3, int3 *); float4 __ovld remquo(float4, float4, int4 *); float8 __ovld remquo(float8, float8, int8 *); float16 __ovld remquo(float16, float16, int16 *); #ifdef cl_khr_fp64 double __ovld remquo(double, double, int *); double2 __ovld remquo(double2, double2, int2 *); double3 __ovld remquo(double3, double3, int3 *); double4 __ovld remquo(double4, double4, int4 *); double8 __ovld remquo(double8, double8, int8 *); double16 __ovld remquo(double16, double16, int16 *); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld remquo(half, half, int *); half2 __ovld remquo(half2, half2, int2 *); half3 __ovld remquo(half3, half3, int3 *); half4 __ovld remquo(half4, half4, int4 *); half8 __ovld remquo(half8, half8, int8 *); half16 __ovld remquo(half16, half16, int16 *); #endif //cl_khr_fp16 #endif //defined(__opencl_c_generic_address_space) #if defined(__opencl_c_named_address_space_builtins) float __ovld remquo(float, float, __global int *); float2 __ovld remquo(float2, float2, __global int2 *); float3 __ovld remquo(float3, float3, __global int3 *); float4 __ovld remquo(float4, float4, __global int4 *); float8 __ovld remquo(float8, float8, __global int8 *); float16 __ovld remquo(float16, float16, __global int16 *); float __ovld remquo(float, float, __local int *); float2 __ovld remquo(float2, float2, __local int2 *); float3 __ovld remquo(float3, float3, __local int3 *); float4 __ovld remquo(float4, float4, __local int4 *); float8 __ovld remquo(float8, float8, __local int8 *); float16 __ovld remquo(float16, float16, __local int16 *); float __ovld remquo(float, float, __private int *); float2 __ovld remquo(float2, float2, __private int2 *); float3 __ovld remquo(float3, float3, __private int3 *); float4 __ovld remquo(float4, float4, __private int4 *); float8 __ovld remquo(float8, float8, __private int8 *); float16 __ovld remquo(float16, float16, __private int16 *); #ifdef cl_khr_fp64 double __ovld remquo(double, double, __global int *); double2 __ovld remquo(double2, double2, __global int2 *); double3 __ovld remquo(double3, double3, __global int3 *); double4 __ovld remquo(double4, double4, __global int4 *); double8 __ovld remquo(double8, double8, __global int8 *); double16 __ovld remquo(double16, double16, __global int16 *); double __ovld remquo(double, double, __local int *); double2 __ovld remquo(double2, double2, __local int2 *); double3 __ovld remquo(double3, double3, __local int3 *); double4 __ovld remquo(double4, double4, __local int4 *); double8 __ovld remquo(double8, double8, __local int8 *); double16 __ovld remquo(double16, double16, __local int16 *); double __ovld remquo(double, double, __private int *); double2 __ovld remquo(double2, double2, __private int2 *); double3 __ovld remquo(double3, double3, __private int3 *); double4 __ovld remquo(double4, double4, __private int4 *); double8 __ovld remquo(double8, double8, __private int8 *); double16 __ovld remquo(double16, double16, __private int16 *); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld remquo(half, half, __global int *); half2 __ovld remquo(half2, half2, __global int2 *); half3 __ovld remquo(half3, half3, __global int3 *); half4 __ovld remquo(half4, half4, __global int4 *); half8 __ovld remquo(half8, half8, __global int8 *); half16 __ovld remquo(half16, half16, __global int16 *); half __ovld remquo(half, half, __local int *); half2 __ovld remquo(half2, half2, __local int2 *); half3 __ovld remquo(half3, half3, __local int3 *); half4 __ovld remquo(half4, half4, __local int4 *); half8 __ovld remquo(half8, half8, __local int8 *); half16 __ovld remquo(half16, half16, __local int16 *); half __ovld remquo(half, half, __private int *); half2 __ovld remquo(half2, half2, __private int2 *); half3 __ovld remquo(half3, half3, __private int3 *); half4 __ovld remquo(half4, half4, __private int4 *); half8 __ovld remquo(half8, half8, __private int8 *); half16 __ovld remquo(half16, half16, __private int16 *); #endif //cl_khr_fp16 #endif //defined(__opencl_c_named_address_space_builtins) /** * Round to integral value (using round to nearest * even rounding mode) in floating-point format. * Refer to section 7.1 for description of rounding * modes. */ float __ovld __cnfn rint(float); float2 __ovld __cnfn rint(float2); float3 __ovld __cnfn rint(float3); float4 __ovld __cnfn rint(float4); float8 __ovld __cnfn rint(float8); float16 __ovld __cnfn rint(float16); #ifdef cl_khr_fp64 double __ovld __cnfn rint(double); double2 __ovld __cnfn rint(double2); double3 __ovld __cnfn rint(double3); double4 __ovld __cnfn rint(double4); double8 __ovld __cnfn rint(double8); double16 __ovld __cnfn rint(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn rint(half); half2 __ovld __cnfn rint(half2); half3 __ovld __cnfn rint(half3); half4 __ovld __cnfn rint(half4); half8 __ovld __cnfn rint(half8); half16 __ovld __cnfn rint(half16); #endif //cl_khr_fp16 /** * Compute x to the power 1/y. */ float __ovld __cnfn rootn(float, int); float2 __ovld __cnfn rootn(float2, int2); float3 __ovld __cnfn rootn(float3, int3); float4 __ovld __cnfn rootn(float4, int4); float8 __ovld __cnfn rootn(float8, int8); float16 __ovld __cnfn rootn(float16, int16); #ifdef cl_khr_fp64 double __ovld __cnfn rootn(double, int); double2 __ovld __cnfn rootn(double2, int2); double3 __ovld __cnfn rootn(double3, int3); double4 __ovld __cnfn rootn(double4, int4); double8 __ovld __cnfn rootn(double8, int8); double16 __ovld __cnfn rootn(double16, int16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn rootn(half, int); half2 __ovld __cnfn rootn(half2, int2); half3 __ovld __cnfn rootn(half3, int3); half4 __ovld __cnfn rootn(half4, int4); half8 __ovld __cnfn rootn(half8, int8); half16 __ovld __cnfn rootn(half16, int16); #endif //cl_khr_fp16 /** * Return the integral value nearest to x rounding * halfway cases away from zero, regardless of the * current rounding direction. */ float __ovld __cnfn round(float); float2 __ovld __cnfn round(float2); float3 __ovld __cnfn round(float3); float4 __ovld __cnfn round(float4); float8 __ovld __cnfn round(float8); float16 __ovld __cnfn round(float16); #ifdef cl_khr_fp64 double __ovld __cnfn round(double); double2 __ovld __cnfn round(double2); double3 __ovld __cnfn round(double3); double4 __ovld __cnfn round(double4); double8 __ovld __cnfn round(double8); double16 __ovld __cnfn round(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn round(half); half2 __ovld __cnfn round(half2); half3 __ovld __cnfn round(half3); half4 __ovld __cnfn round(half4); half8 __ovld __cnfn round(half8); half16 __ovld __cnfn round(half16); #endif //cl_khr_fp16 /** * Compute inverse square root. */ float __ovld __cnfn rsqrt(float); float2 __ovld __cnfn rsqrt(float2); float3 __ovld __cnfn rsqrt(float3); float4 __ovld __cnfn rsqrt(float4); float8 __ovld __cnfn rsqrt(float8); float16 __ovld __cnfn rsqrt(float16); #ifdef cl_khr_fp64 double __ovld __cnfn rsqrt(double); double2 __ovld __cnfn rsqrt(double2); double3 __ovld __cnfn rsqrt(double3); double4 __ovld __cnfn rsqrt(double4); double8 __ovld __cnfn rsqrt(double8); double16 __ovld __cnfn rsqrt(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn rsqrt(half); half2 __ovld __cnfn rsqrt(half2); half3 __ovld __cnfn rsqrt(half3); half4 __ovld __cnfn rsqrt(half4); half8 __ovld __cnfn rsqrt(half8); half16 __ovld __cnfn rsqrt(half16); #endif //cl_khr_fp16 /** * Compute sine. */ float __ovld __cnfn sin(float); float2 __ovld __cnfn sin(float2); float3 __ovld __cnfn sin(float3); float4 __ovld __cnfn sin(float4); float8 __ovld __cnfn sin(float8); float16 __ovld __cnfn sin(float16); #ifdef cl_khr_fp64 double __ovld __cnfn sin(double); double2 __ovld __cnfn sin(double2); double3 __ovld __cnfn sin(double3); double4 __ovld __cnfn sin(double4); double8 __ovld __cnfn sin(double8); double16 __ovld __cnfn sin(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn sin(half); half2 __ovld __cnfn sin(half2); half3 __ovld __cnfn sin(half3); half4 __ovld __cnfn sin(half4); half8 __ovld __cnfn sin(half8); half16 __ovld __cnfn sin(half16); #endif //cl_khr_fp16 /** * Compute sine and cosine of x. The computed sine * is the return value and computed cosine is returned * in cosval. */ #if defined(__opencl_c_generic_address_space) float __ovld sincos(float, float *); float2 __ovld sincos(float2, float2 *); float3 __ovld sincos(float3, float3 *); float4 __ovld sincos(float4, float4 *); float8 __ovld sincos(float8, float8 *); float16 __ovld sincos(float16, float16 *); #ifdef cl_khr_fp64 double __ovld sincos(double, double *); double2 __ovld sincos(double2, double2 *); double3 __ovld sincos(double3, double3 *); double4 __ovld sincos(double4, double4 *); double8 __ovld sincos(double8, double8 *); double16 __ovld sincos(double16, double16 *); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld sincos(half, half *); half2 __ovld sincos(half2, half2 *); half3 __ovld sincos(half3, half3 *); half4 __ovld sincos(half4, half4 *); half8 __ovld sincos(half8, half8 *); half16 __ovld sincos(half16, half16 *); #endif //cl_khr_fp16 #endif //defined(__opencl_c_generic_address_space) #if defined(__opencl_c_named_address_space_builtins) float __ovld sincos(float, __global float *); float2 __ovld sincos(float2, __global float2 *); float3 __ovld sincos(float3, __global float3 *); float4 __ovld sincos(float4, __global float4 *); float8 __ovld sincos(float8, __global float8 *); float16 __ovld sincos(float16, __global float16 *); float __ovld sincos(float, __local float *); float2 __ovld sincos(float2, __local float2 *); float3 __ovld sincos(float3, __local float3 *); float4 __ovld sincos(float4, __local float4 *); float8 __ovld sincos(float8, __local float8 *); float16 __ovld sincos(float16, __local float16 *); float __ovld sincos(float, __private float *); float2 __ovld sincos(float2, __private float2 *); float3 __ovld sincos(float3, __private float3 *); float4 __ovld sincos(float4, __private float4 *); float8 __ovld sincos(float8, __private float8 *); float16 __ovld sincos(float16, __private float16 *); #ifdef cl_khr_fp64 double __ovld sincos(double, __global double *); double2 __ovld sincos(double2, __global double2 *); double3 __ovld sincos(double3, __global double3 *); double4 __ovld sincos(double4, __global double4 *); double8 __ovld sincos(double8, __global double8 *); double16 __ovld sincos(double16, __global double16 *); double __ovld sincos(double, __local double *); double2 __ovld sincos(double2, __local double2 *); double3 __ovld sincos(double3, __local double3 *); double4 __ovld sincos(double4, __local double4 *); double8 __ovld sincos(double8, __local double8 *); double16 __ovld sincos(double16, __local double16 *); double __ovld sincos(double, __private double *); double2 __ovld sincos(double2, __private double2 *); double3 __ovld sincos(double3, __private double3 *); double4 __ovld sincos(double4, __private double4 *); double8 __ovld sincos(double8, __private double8 *); double16 __ovld sincos(double16, __private double16 *); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld sincos(half, __global half *); half2 __ovld sincos(half2, __global half2 *); half3 __ovld sincos(half3, __global half3 *); half4 __ovld sincos(half4, __global half4 *); half8 __ovld sincos(half8, __global half8 *); half16 __ovld sincos(half16, __global half16 *); half __ovld sincos(half, __local half *); half2 __ovld sincos(half2, __local half2 *); half3 __ovld sincos(half3, __local half3 *); half4 __ovld sincos(half4, __local half4 *); half8 __ovld sincos(half8, __local half8 *); half16 __ovld sincos(half16, __local half16 *); half __ovld sincos(half, __private half *); half2 __ovld sincos(half2, __private half2 *); half3 __ovld sincos(half3, __private half3 *); half4 __ovld sincos(half4, __private half4 *); half8 __ovld sincos(half8, __private half8 *); half16 __ovld sincos(half16, __private half16 *); #endif //cl_khr_fp16 #endif //defined(__opencl_c_named_address_space_builtins) /** * Compute hyperbolic sine. */ float __ovld __cnfn sinh(float); float2 __ovld __cnfn sinh(float2); float3 __ovld __cnfn sinh(float3); float4 __ovld __cnfn sinh(float4); float8 __ovld __cnfn sinh(float8); float16 __ovld __cnfn sinh(float16); #ifdef cl_khr_fp64 double __ovld __cnfn sinh(double); double2 __ovld __cnfn sinh(double2); double3 __ovld __cnfn sinh(double3); double4 __ovld __cnfn sinh(double4); double8 __ovld __cnfn sinh(double8); double16 __ovld __cnfn sinh(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn sinh(half); half2 __ovld __cnfn sinh(half2); half3 __ovld __cnfn sinh(half3); half4 __ovld __cnfn sinh(half4); half8 __ovld __cnfn sinh(half8); half16 __ovld __cnfn sinh(half16); #endif //cl_khr_fp16 /** * Compute sin (PI * x). */ float __ovld __cnfn sinpi(float); float2 __ovld __cnfn sinpi(float2); float3 __ovld __cnfn sinpi(float3); float4 __ovld __cnfn sinpi(float4); float8 __ovld __cnfn sinpi(float8); float16 __ovld __cnfn sinpi(float16); #ifdef cl_khr_fp64 double __ovld __cnfn sinpi(double); double2 __ovld __cnfn sinpi(double2); double3 __ovld __cnfn sinpi(double3); double4 __ovld __cnfn sinpi(double4); double8 __ovld __cnfn sinpi(double8); double16 __ovld __cnfn sinpi(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn sinpi(half); half2 __ovld __cnfn sinpi(half2); half3 __ovld __cnfn sinpi(half3); half4 __ovld __cnfn sinpi(half4); half8 __ovld __cnfn sinpi(half8); half16 __ovld __cnfn sinpi(half16); #endif //cl_khr_fp16 /** * Compute square root. */ float __ovld __cnfn sqrt(float); float2 __ovld __cnfn sqrt(float2); float3 __ovld __cnfn sqrt(float3); float4 __ovld __cnfn sqrt(float4); float8 __ovld __cnfn sqrt(float8); float16 __ovld __cnfn sqrt(float16); #ifdef cl_khr_fp64 double __ovld __cnfn sqrt(double); double2 __ovld __cnfn sqrt(double2); double3 __ovld __cnfn sqrt(double3); double4 __ovld __cnfn sqrt(double4); double8 __ovld __cnfn sqrt(double8); double16 __ovld __cnfn sqrt(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn sqrt(half); half2 __ovld __cnfn sqrt(half2); half3 __ovld __cnfn sqrt(half3); half4 __ovld __cnfn sqrt(half4); half8 __ovld __cnfn sqrt(half8); half16 __ovld __cnfn sqrt(half16); #endif //cl_khr_fp16 /** * Compute tangent. */ float __ovld __cnfn tan(float); float2 __ovld __cnfn tan(float2); float3 __ovld __cnfn tan(float3); float4 __ovld __cnfn tan(float4); float8 __ovld __cnfn tan(float8); float16 __ovld __cnfn tan(float16); #ifdef cl_khr_fp64 double __ovld __cnfn tan(double); double2 __ovld __cnfn tan(double2); double3 __ovld __cnfn tan(double3); double4 __ovld __cnfn tan(double4); double8 __ovld __cnfn tan(double8); double16 __ovld __cnfn tan(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn tan(half); half2 __ovld __cnfn tan(half2); half3 __ovld __cnfn tan(half3); half4 __ovld __cnfn tan(half4); half8 __ovld __cnfn tan(half8); half16 __ovld __cnfn tan(half16); #endif //cl_khr_fp16 /** * Compute hyperbolic tangent. */ float __ovld __cnfn tanh(float); float2 __ovld __cnfn tanh(float2); float3 __ovld __cnfn tanh(float3); float4 __ovld __cnfn tanh(float4); float8 __ovld __cnfn tanh(float8); float16 __ovld __cnfn tanh(float16); #ifdef cl_khr_fp64 double __ovld __cnfn tanh(double); double2 __ovld __cnfn tanh(double2); double3 __ovld __cnfn tanh(double3); double4 __ovld __cnfn tanh(double4); double8 __ovld __cnfn tanh(double8); double16 __ovld __cnfn tanh(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn tanh(half); half2 __ovld __cnfn tanh(half2); half3 __ovld __cnfn tanh(half3); half4 __ovld __cnfn tanh(half4); half8 __ovld __cnfn tanh(half8); half16 __ovld __cnfn tanh(half16); #endif //cl_khr_fp16 /** * Compute tan (PI * x). */ float __ovld __cnfn tanpi(float); float2 __ovld __cnfn tanpi(float2); float3 __ovld __cnfn tanpi(float3); float4 __ovld __cnfn tanpi(float4); float8 __ovld __cnfn tanpi(float8); float16 __ovld __cnfn tanpi(float16); #ifdef cl_khr_fp64 double __ovld __cnfn tanpi(double); double2 __ovld __cnfn tanpi(double2); double3 __ovld __cnfn tanpi(double3); double4 __ovld __cnfn tanpi(double4); double8 __ovld __cnfn tanpi(double8); double16 __ovld __cnfn tanpi(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn tanpi(half); half2 __ovld __cnfn tanpi(half2); half3 __ovld __cnfn tanpi(half3); half4 __ovld __cnfn tanpi(half4); half8 __ovld __cnfn tanpi(half8); half16 __ovld __cnfn tanpi(half16); #endif //cl_khr_fp16 /** * Compute the gamma function. */ float __ovld __cnfn tgamma(float); float2 __ovld __cnfn tgamma(float2); float3 __ovld __cnfn tgamma(float3); float4 __ovld __cnfn tgamma(float4); float8 __ovld __cnfn tgamma(float8); float16 __ovld __cnfn tgamma(float16); #ifdef cl_khr_fp64 double __ovld __cnfn tgamma(double); double2 __ovld __cnfn tgamma(double2); double3 __ovld __cnfn tgamma(double3); double4 __ovld __cnfn tgamma(double4); double8 __ovld __cnfn tgamma(double8); double16 __ovld __cnfn tgamma(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn tgamma(half); half2 __ovld __cnfn tgamma(half2); half3 __ovld __cnfn tgamma(half3); half4 __ovld __cnfn tgamma(half4); half8 __ovld __cnfn tgamma(half8); half16 __ovld __cnfn tgamma(half16); #endif //cl_khr_fp16 /** * Round to integral value using the round to zero * rounding mode. */ float __ovld __cnfn trunc(float); float2 __ovld __cnfn trunc(float2); float3 __ovld __cnfn trunc(float3); float4 __ovld __cnfn trunc(float4); float8 __ovld __cnfn trunc(float8); float16 __ovld __cnfn trunc(float16); #ifdef cl_khr_fp64 double __ovld __cnfn trunc(double); double2 __ovld __cnfn trunc(double2); double3 __ovld __cnfn trunc(double3); double4 __ovld __cnfn trunc(double4); double8 __ovld __cnfn trunc(double8); double16 __ovld __cnfn trunc(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn trunc(half); half2 __ovld __cnfn trunc(half2); half3 __ovld __cnfn trunc(half3); half4 __ovld __cnfn trunc(half4); half8 __ovld __cnfn trunc(half8); half16 __ovld __cnfn trunc(half16); #endif //cl_khr_fp16 /** * Compute cosine. x must be in the range -2^16 ... +2^16. */ float __ovld __cnfn half_cos(float); float2 __ovld __cnfn half_cos(float2); float3 __ovld __cnfn half_cos(float3); float4 __ovld __cnfn half_cos(float4); float8 __ovld __cnfn half_cos(float8); float16 __ovld __cnfn half_cos(float16); /** * Compute x / y. */ float __ovld __cnfn half_divide(float, float); float2 __ovld __cnfn half_divide(float2, float2); float3 __ovld __cnfn half_divide(float3, float3); float4 __ovld __cnfn half_divide(float4, float4); float8 __ovld __cnfn half_divide(float8, float8); float16 __ovld __cnfn half_divide(float16, float16); /** * Compute the base- e exponential of x. */ float __ovld __cnfn half_exp(float); float2 __ovld __cnfn half_exp(float2); float3 __ovld __cnfn half_exp(float3); float4 __ovld __cnfn half_exp(float4); float8 __ovld __cnfn half_exp(float8); float16 __ovld __cnfn half_exp(float16); /** * Compute the base- 2 exponential of x. */ float __ovld __cnfn half_exp2(float); float2 __ovld __cnfn half_exp2(float2); float3 __ovld __cnfn half_exp2(float3); float4 __ovld __cnfn half_exp2(float4); float8 __ovld __cnfn half_exp2(float8); float16 __ovld __cnfn half_exp2(float16); /** * Compute the base- 10 exponential of x. */ float __ovld __cnfn half_exp10(float); float2 __ovld __cnfn half_exp10(float2); float3 __ovld __cnfn half_exp10(float3); float4 __ovld __cnfn half_exp10(float4); float8 __ovld __cnfn half_exp10(float8); float16 __ovld __cnfn half_exp10(float16); /** * Compute natural logarithm. */ float __ovld __cnfn half_log(float); float2 __ovld __cnfn half_log(float2); float3 __ovld __cnfn half_log(float3); float4 __ovld __cnfn half_log(float4); float8 __ovld __cnfn half_log(float8); float16 __ovld __cnfn half_log(float16); /** * Compute a base 2 logarithm. */ float __ovld __cnfn half_log2(float); float2 __ovld __cnfn half_log2(float2); float3 __ovld __cnfn half_log2(float3); float4 __ovld __cnfn half_log2(float4); float8 __ovld __cnfn half_log2(float8); float16 __ovld __cnfn half_log2(float16); /** * Compute a base 10 logarithm. */ float __ovld __cnfn half_log10(float); float2 __ovld __cnfn half_log10(float2); float3 __ovld __cnfn half_log10(float3); float4 __ovld __cnfn half_log10(float4); float8 __ovld __cnfn half_log10(float8); float16 __ovld __cnfn half_log10(float16); /** * Compute x to the power y, where x is >= 0. */ float __ovld __cnfn half_powr(float, float); float2 __ovld __cnfn half_powr(float2, float2); float3 __ovld __cnfn half_powr(float3, float3); float4 __ovld __cnfn half_powr(float4, float4); float8 __ovld __cnfn half_powr(float8, float8); float16 __ovld __cnfn half_powr(float16, float16); /** * Compute reciprocal. */ float __ovld __cnfn half_recip(float); float2 __ovld __cnfn half_recip(float2); float3 __ovld __cnfn half_recip(float3); float4 __ovld __cnfn half_recip(float4); float8 __ovld __cnfn half_recip(float8); float16 __ovld __cnfn half_recip(float16); /** * Compute inverse square root. */ float __ovld __cnfn half_rsqrt(float); float2 __ovld __cnfn half_rsqrt(float2); float3 __ovld __cnfn half_rsqrt(float3); float4 __ovld __cnfn half_rsqrt(float4); float8 __ovld __cnfn half_rsqrt(float8); float16 __ovld __cnfn half_rsqrt(float16); /** * Compute sine. x must be in the range -2^16 ... +2^16. */ float __ovld __cnfn half_sin(float); float2 __ovld __cnfn half_sin(float2); float3 __ovld __cnfn half_sin(float3); float4 __ovld __cnfn half_sin(float4); float8 __ovld __cnfn half_sin(float8); float16 __ovld __cnfn half_sin(float16); /** * Compute square root. */ float __ovld __cnfn half_sqrt(float); float2 __ovld __cnfn half_sqrt(float2); float3 __ovld __cnfn half_sqrt(float3); float4 __ovld __cnfn half_sqrt(float4); float8 __ovld __cnfn half_sqrt(float8); float16 __ovld __cnfn half_sqrt(float16); /** * Compute tangent. x must be in the range -216 ... +216. */ float __ovld __cnfn half_tan(float); float2 __ovld __cnfn half_tan(float2); float3 __ovld __cnfn half_tan(float3); float4 __ovld __cnfn half_tan(float4); float8 __ovld __cnfn half_tan(float8); float16 __ovld __cnfn half_tan(float16); /** * Compute cosine over an implementation-defined range. * The maximum error is implementation-defined. */ float __ovld __cnfn native_cos(float); float2 __ovld __cnfn native_cos(float2); float3 __ovld __cnfn native_cos(float3); float4 __ovld __cnfn native_cos(float4); float8 __ovld __cnfn native_cos(float8); float16 __ovld __cnfn native_cos(float16); /** * Compute x / y over an implementation-defined range. * The maximum error is implementation-defined. */ float __ovld __cnfn native_divide(float, float); float2 __ovld __cnfn native_divide(float2, float2); float3 __ovld __cnfn native_divide(float3, float3); float4 __ovld __cnfn native_divide(float4, float4); float8 __ovld __cnfn native_divide(float8, float8); float16 __ovld __cnfn native_divide(float16, float16); /** * Compute the base- e exponential of x over an * implementation-defined range. The maximum error is * implementation-defined. */ float __ovld __cnfn native_exp(float); float2 __ovld __cnfn native_exp(float2); float3 __ovld __cnfn native_exp(float3); float4 __ovld __cnfn native_exp(float4); float8 __ovld __cnfn native_exp(float8); float16 __ovld __cnfn native_exp(float16); /** * Compute the base- 2 exponential of x over an * implementation-defined range. The maximum error is * implementation-defined. */ float __ovld __cnfn native_exp2(float); float2 __ovld __cnfn native_exp2(float2); float3 __ovld __cnfn native_exp2(float3); float4 __ovld __cnfn native_exp2(float4); float8 __ovld __cnfn native_exp2(float8); float16 __ovld __cnfn native_exp2(float16); /** * Compute the base- 10 exponential of x over an * implementation-defined range. The maximum error is * implementation-defined. */ float __ovld __cnfn native_exp10(float); float2 __ovld __cnfn native_exp10(float2); float3 __ovld __cnfn native_exp10(float3); float4 __ovld __cnfn native_exp10(float4); float8 __ovld __cnfn native_exp10(float8); float16 __ovld __cnfn native_exp10(float16); /** * Compute natural logarithm over an implementationdefined * range. The maximum error is implementation * defined. */ float __ovld __cnfn native_log(float); float2 __ovld __cnfn native_log(float2); float3 __ovld __cnfn native_log(float3); float4 __ovld __cnfn native_log(float4); float8 __ovld __cnfn native_log(float8); float16 __ovld __cnfn native_log(float16); /** * Compute a base 2 logarithm over an implementationdefined * range. The maximum error is implementationdefined. */ float __ovld __cnfn native_log2(float); float2 __ovld __cnfn native_log2(float2); float3 __ovld __cnfn native_log2(float3); float4 __ovld __cnfn native_log2(float4); float8 __ovld __cnfn native_log2(float8); float16 __ovld __cnfn native_log2(float16); /** * Compute a base 10 logarithm over an implementationdefined * range. The maximum error is implementationdefined. */ float __ovld __cnfn native_log10(float); float2 __ovld __cnfn native_log10(float2); float3 __ovld __cnfn native_log10(float3); float4 __ovld __cnfn native_log10(float4); float8 __ovld __cnfn native_log10(float8); float16 __ovld __cnfn native_log10(float16); /** * Compute x to the power y, where x is >= 0. The range of * x and y are implementation-defined. The maximum error * is implementation-defined. */ float __ovld __cnfn native_powr(float, float); float2 __ovld __cnfn native_powr(float2, float2); float3 __ovld __cnfn native_powr(float3, float3); float4 __ovld __cnfn native_powr(float4, float4); float8 __ovld __cnfn native_powr(float8, float8); float16 __ovld __cnfn native_powr(float16, float16); /** * Compute reciprocal over an implementation-defined * range. The maximum error is implementation-defined. */ float __ovld __cnfn native_recip(float); float2 __ovld __cnfn native_recip(float2); float3 __ovld __cnfn native_recip(float3); float4 __ovld __cnfn native_recip(float4); float8 __ovld __cnfn native_recip(float8); float16 __ovld __cnfn native_recip(float16); /** * Compute inverse square root over an implementationdefined * range. The maximum error is implementationdefined. */ float __ovld __cnfn native_rsqrt(float); float2 __ovld __cnfn native_rsqrt(float2); float3 __ovld __cnfn native_rsqrt(float3); float4 __ovld __cnfn native_rsqrt(float4); float8 __ovld __cnfn native_rsqrt(float8); float16 __ovld __cnfn native_rsqrt(float16); /** * Compute sine over an implementation-defined range. * The maximum error is implementation-defined. */ float __ovld __cnfn native_sin(float); float2 __ovld __cnfn native_sin(float2); float3 __ovld __cnfn native_sin(float3); float4 __ovld __cnfn native_sin(float4); float8 __ovld __cnfn native_sin(float8); float16 __ovld __cnfn native_sin(float16); /** * Compute square root over an implementation-defined * range. The maximum error is implementation-defined. */ float __ovld __cnfn native_sqrt(float); float2 __ovld __cnfn native_sqrt(float2); float3 __ovld __cnfn native_sqrt(float3); float4 __ovld __cnfn native_sqrt(float4); float8 __ovld __cnfn native_sqrt(float8); float16 __ovld __cnfn native_sqrt(float16); /** * Compute tangent over an implementation-defined range. * The maximum error is implementation-defined. */ float __ovld __cnfn native_tan(float); float2 __ovld __cnfn native_tan(float2); float3 __ovld __cnfn native_tan(float3); float4 __ovld __cnfn native_tan(float4); float8 __ovld __cnfn native_tan(float8); float16 __ovld __cnfn native_tan(float16); // OpenCL v1.1 s6.11.3, v1.2 s6.12.3, v2.0 s6.13.3 - Integer Functions /** * Returns | x |. */ uchar __ovld __cnfn abs(char); uchar __ovld __cnfn abs(uchar); uchar2 __ovld __cnfn abs(char2); uchar2 __ovld __cnfn abs(uchar2); uchar3 __ovld __cnfn abs(char3); uchar3 __ovld __cnfn abs(uchar3); uchar4 __ovld __cnfn abs(char4); uchar4 __ovld __cnfn abs(uchar4); uchar8 __ovld __cnfn abs(char8); uchar8 __ovld __cnfn abs(uchar8); uchar16 __ovld __cnfn abs(char16); uchar16 __ovld __cnfn abs(uchar16); ushort __ovld __cnfn abs(short); ushort __ovld __cnfn abs(ushort); ushort2 __ovld __cnfn abs(short2); ushort2 __ovld __cnfn abs(ushort2); ushort3 __ovld __cnfn abs(short3); ushort3 __ovld __cnfn abs(ushort3); ushort4 __ovld __cnfn abs(short4); ushort4 __ovld __cnfn abs(ushort4); ushort8 __ovld __cnfn abs(short8); ushort8 __ovld __cnfn abs(ushort8); ushort16 __ovld __cnfn abs(short16); ushort16 __ovld __cnfn abs(ushort16); uint __ovld __cnfn abs(int); uint __ovld __cnfn abs(uint); uint2 __ovld __cnfn abs(int2); uint2 __ovld __cnfn abs(uint2); uint3 __ovld __cnfn abs(int3); uint3 __ovld __cnfn abs(uint3); uint4 __ovld __cnfn abs(int4); uint4 __ovld __cnfn abs(uint4); uint8 __ovld __cnfn abs(int8); uint8 __ovld __cnfn abs(uint8); uint16 __ovld __cnfn abs(int16); uint16 __ovld __cnfn abs(uint16); ulong __ovld __cnfn abs(long); ulong __ovld __cnfn abs(ulong); ulong2 __ovld __cnfn abs(long2); ulong2 __ovld __cnfn abs(ulong2); ulong3 __ovld __cnfn abs(long3); ulong3 __ovld __cnfn abs(ulong3); ulong4 __ovld __cnfn abs(long4); ulong4 __ovld __cnfn abs(ulong4); ulong8 __ovld __cnfn abs(long8); ulong8 __ovld __cnfn abs(ulong8); ulong16 __ovld __cnfn abs(long16); ulong16 __ovld __cnfn abs(ulong16); /** * Returns | x - y | without modulo overflow. */ uchar __ovld __cnfn abs_diff(char, char); uchar __ovld __cnfn abs_diff(uchar, uchar); uchar2 __ovld __cnfn abs_diff(char2, char2); uchar2 __ovld __cnfn abs_diff(uchar2, uchar2); uchar3 __ovld __cnfn abs_diff(char3, char3); uchar3 __ovld __cnfn abs_diff(uchar3, uchar3); uchar4 __ovld __cnfn abs_diff(char4, char4); uchar4 __ovld __cnfn abs_diff(uchar4, uchar4); uchar8 __ovld __cnfn abs_diff(char8, char8); uchar8 __ovld __cnfn abs_diff(uchar8, uchar8); uchar16 __ovld __cnfn abs_diff(char16, char16); uchar16 __ovld __cnfn abs_diff(uchar16, uchar16); ushort __ovld __cnfn abs_diff(short, short); ushort __ovld __cnfn abs_diff(ushort, ushort); ushort2 __ovld __cnfn abs_diff(short2, short2); ushort2 __ovld __cnfn abs_diff(ushort2, ushort2); ushort3 __ovld __cnfn abs_diff(short3, short3); ushort3 __ovld __cnfn abs_diff(ushort3, ushort3); ushort4 __ovld __cnfn abs_diff(short4, short4); ushort4 __ovld __cnfn abs_diff(ushort4, ushort4); ushort8 __ovld __cnfn abs_diff(short8, short8); ushort8 __ovld __cnfn abs_diff(ushort8, ushort8); ushort16 __ovld __cnfn abs_diff(short16, short16); ushort16 __ovld __cnfn abs_diff(ushort16, ushort16); uint __ovld __cnfn abs_diff(int, int); uint __ovld __cnfn abs_diff(uint, uint); uint2 __ovld __cnfn abs_diff(int2, int2); uint2 __ovld __cnfn abs_diff(uint2, uint2); uint3 __ovld __cnfn abs_diff(int3, int3); uint3 __ovld __cnfn abs_diff(uint3, uint3); uint4 __ovld __cnfn abs_diff(int4, int4); uint4 __ovld __cnfn abs_diff(uint4, uint4); uint8 __ovld __cnfn abs_diff(int8, int8); uint8 __ovld __cnfn abs_diff(uint8, uint8); uint16 __ovld __cnfn abs_diff(int16, int16); uint16 __ovld __cnfn abs_diff(uint16, uint16); ulong __ovld __cnfn abs_diff(long, long); ulong __ovld __cnfn abs_diff(ulong, ulong); ulong2 __ovld __cnfn abs_diff(long2, long2); ulong2 __ovld __cnfn abs_diff(ulong2, ulong2); ulong3 __ovld __cnfn abs_diff(long3, long3); ulong3 __ovld __cnfn abs_diff(ulong3, ulong3); ulong4 __ovld __cnfn abs_diff(long4, long4); ulong4 __ovld __cnfn abs_diff(ulong4, ulong4); ulong8 __ovld __cnfn abs_diff(long8, long8); ulong8 __ovld __cnfn abs_diff(ulong8, ulong8); ulong16 __ovld __cnfn abs_diff(long16, long16); ulong16 __ovld __cnfn abs_diff(ulong16, ulong16); /** * Returns x + y and saturates the result. */ char __ovld __cnfn add_sat(char, char); uchar __ovld __cnfn add_sat(uchar, uchar); char2 __ovld __cnfn add_sat(char2, char2); uchar2 __ovld __cnfn add_sat(uchar2, uchar2); char3 __ovld __cnfn add_sat(char3, char3); uchar3 __ovld __cnfn add_sat(uchar3, uchar3); char4 __ovld __cnfn add_sat(char4, char4); uchar4 __ovld __cnfn add_sat(uchar4, uchar4); char8 __ovld __cnfn add_sat(char8, char8); uchar8 __ovld __cnfn add_sat(uchar8, uchar8); char16 __ovld __cnfn add_sat(char16, char16); uchar16 __ovld __cnfn add_sat(uchar16, uchar16); short __ovld __cnfn add_sat(short, short); ushort __ovld __cnfn add_sat(ushort, ushort); short2 __ovld __cnfn add_sat(short2, short2); ushort2 __ovld __cnfn add_sat(ushort2, ushort2); short3 __ovld __cnfn add_sat(short3, short3); ushort3 __ovld __cnfn add_sat(ushort3, ushort3); short4 __ovld __cnfn add_sat(short4, short4); ushort4 __ovld __cnfn add_sat(ushort4, ushort4); short8 __ovld __cnfn add_sat(short8, short8); ushort8 __ovld __cnfn add_sat(ushort8, ushort8); short16 __ovld __cnfn add_sat(short16, short16); ushort16 __ovld __cnfn add_sat(ushort16, ushort16); int __ovld __cnfn add_sat(int, int); uint __ovld __cnfn add_sat(uint, uint); int2 __ovld __cnfn add_sat(int2, int2); uint2 __ovld __cnfn add_sat(uint2, uint2); int3 __ovld __cnfn add_sat(int3, int3); uint3 __ovld __cnfn add_sat(uint3, uint3); int4 __ovld __cnfn add_sat(int4, int4); uint4 __ovld __cnfn add_sat(uint4, uint4); int8 __ovld __cnfn add_sat(int8, int8); uint8 __ovld __cnfn add_sat(uint8, uint8); int16 __ovld __cnfn add_sat(int16, int16); uint16 __ovld __cnfn add_sat(uint16, uint16); long __ovld __cnfn add_sat(long, long); ulong __ovld __cnfn add_sat(ulong, ulong); long2 __ovld __cnfn add_sat(long2, long2); ulong2 __ovld __cnfn add_sat(ulong2, ulong2); long3 __ovld __cnfn add_sat(long3, long3); ulong3 __ovld __cnfn add_sat(ulong3, ulong3); long4 __ovld __cnfn add_sat(long4, long4); ulong4 __ovld __cnfn add_sat(ulong4, ulong4); long8 __ovld __cnfn add_sat(long8, long8); ulong8 __ovld __cnfn add_sat(ulong8, ulong8); long16 __ovld __cnfn add_sat(long16, long16); ulong16 __ovld __cnfn add_sat(ulong16, ulong16); /** * Returns (x + y) >> 1. The intermediate sum does * not modulo overflow. */ char __ovld __cnfn hadd(char, char); uchar __ovld __cnfn hadd(uchar, uchar); char2 __ovld __cnfn hadd(char2, char2); uchar2 __ovld __cnfn hadd(uchar2, uchar2); char3 __ovld __cnfn hadd(char3, char3); uchar3 __ovld __cnfn hadd(uchar3, uchar3); char4 __ovld __cnfn hadd(char4, char4); uchar4 __ovld __cnfn hadd(uchar4, uchar4); char8 __ovld __cnfn hadd(char8, char8); uchar8 __ovld __cnfn hadd(uchar8, uchar8); char16 __ovld __cnfn hadd(char16, char16); uchar16 __ovld __cnfn hadd(uchar16, uchar16); short __ovld __cnfn hadd(short, short); ushort __ovld __cnfn hadd(ushort, ushort); short2 __ovld __cnfn hadd(short2, short2); ushort2 __ovld __cnfn hadd(ushort2, ushort2); short3 __ovld __cnfn hadd(short3, short3); ushort3 __ovld __cnfn hadd(ushort3, ushort3); short4 __ovld __cnfn hadd(short4, short4); ushort4 __ovld __cnfn hadd(ushort4, ushort4); short8 __ovld __cnfn hadd(short8, short8); ushort8 __ovld __cnfn hadd(ushort8, ushort8); short16 __ovld __cnfn hadd(short16, short16); ushort16 __ovld __cnfn hadd(ushort16, ushort16); int __ovld __cnfn hadd(int, int); uint __ovld __cnfn hadd(uint, uint); int2 __ovld __cnfn hadd(int2, int2); uint2 __ovld __cnfn hadd(uint2, uint2); int3 __ovld __cnfn hadd(int3, int3); uint3 __ovld __cnfn hadd(uint3, uint3); int4 __ovld __cnfn hadd(int4, int4); uint4 __ovld __cnfn hadd(uint4, uint4); int8 __ovld __cnfn hadd(int8, int8); uint8 __ovld __cnfn hadd(uint8, uint8); int16 __ovld __cnfn hadd(int16, int16); uint16 __ovld __cnfn hadd(uint16, uint16); long __ovld __cnfn hadd(long, long); ulong __ovld __cnfn hadd(ulong, ulong); long2 __ovld __cnfn hadd(long2, long2); ulong2 __ovld __cnfn hadd(ulong2, ulong2); long3 __ovld __cnfn hadd(long3, long3); ulong3 __ovld __cnfn hadd(ulong3, ulong3); long4 __ovld __cnfn hadd(long4, long4); ulong4 __ovld __cnfn hadd(ulong4, ulong4); long8 __ovld __cnfn hadd(long8, long8); ulong8 __ovld __cnfn hadd(ulong8, ulong8); long16 __ovld __cnfn hadd(long16, long16); ulong16 __ovld __cnfn hadd(ulong16, ulong16); /** * Returns (x + y + 1) >> 1. The intermediate sum * does not modulo overflow. */ char __ovld __cnfn rhadd(char, char); uchar __ovld __cnfn rhadd(uchar, uchar); char2 __ovld __cnfn rhadd(char2, char2); uchar2 __ovld __cnfn rhadd(uchar2, uchar2); char3 __ovld __cnfn rhadd(char3, char3); uchar3 __ovld __cnfn rhadd(uchar3, uchar3); char4 __ovld __cnfn rhadd(char4, char4); uchar4 __ovld __cnfn rhadd(uchar4, uchar4); char8 __ovld __cnfn rhadd(char8, char8); uchar8 __ovld __cnfn rhadd(uchar8, uchar8); char16 __ovld __cnfn rhadd(char16, char16); uchar16 __ovld __cnfn rhadd(uchar16, uchar16); short __ovld __cnfn rhadd(short, short); ushort __ovld __cnfn rhadd(ushort, ushort); short2 __ovld __cnfn rhadd(short2, short2); ushort2 __ovld __cnfn rhadd(ushort2, ushort2); short3 __ovld __cnfn rhadd(short3, short3); ushort3 __ovld __cnfn rhadd(ushort3, ushort3); short4 __ovld __cnfn rhadd(short4, short4); ushort4 __ovld __cnfn rhadd(ushort4, ushort4); short8 __ovld __cnfn rhadd(short8, short8); ushort8 __ovld __cnfn rhadd(ushort8, ushort8); short16 __ovld __cnfn rhadd(short16, short16); ushort16 __ovld __cnfn rhadd(ushort16, ushort16); int __ovld __cnfn rhadd(int, int); uint __ovld __cnfn rhadd(uint, uint); int2 __ovld __cnfn rhadd(int2, int2); uint2 __ovld __cnfn rhadd(uint2, uint2); int3 __ovld __cnfn rhadd(int3, int3); uint3 __ovld __cnfn rhadd(uint3, uint3); int4 __ovld __cnfn rhadd(int4, int4); uint4 __ovld __cnfn rhadd(uint4, uint4); int8 __ovld __cnfn rhadd(int8, int8); uint8 __ovld __cnfn rhadd(uint8, uint8); int16 __ovld __cnfn rhadd(int16, int16); uint16 __ovld __cnfn rhadd(uint16, uint16); long __ovld __cnfn rhadd(long, long); ulong __ovld __cnfn rhadd(ulong, ulong); long2 __ovld __cnfn rhadd(long2, long2); ulong2 __ovld __cnfn rhadd(ulong2, ulong2); long3 __ovld __cnfn rhadd(long3, long3); ulong3 __ovld __cnfn rhadd(ulong3, ulong3); long4 __ovld __cnfn rhadd(long4, long4); ulong4 __ovld __cnfn rhadd(ulong4, ulong4); long8 __ovld __cnfn rhadd(long8, long8); ulong8 __ovld __cnfn rhadd(ulong8, ulong8); long16 __ovld __cnfn rhadd(long16, long16); ulong16 __ovld __cnfn rhadd(ulong16, ulong16); /** * Returns min(max(x, minval), maxval). * Results are undefined if minval > maxval. */ char __ovld __cnfn clamp(char, char, char); uchar __ovld __cnfn clamp(uchar, uchar, uchar); char2 __ovld __cnfn clamp(char2, char2, char2); uchar2 __ovld __cnfn clamp(uchar2, uchar2, uchar2); char3 __ovld __cnfn clamp(char3, char3, char3); uchar3 __ovld __cnfn clamp(uchar3, uchar3, uchar3); char4 __ovld __cnfn clamp(char4, char4, char4); uchar4 __ovld __cnfn clamp(uchar4, uchar4, uchar4); char8 __ovld __cnfn clamp(char8, char8, char8); uchar8 __ovld __cnfn clamp(uchar8, uchar8, uchar8); char16 __ovld __cnfn clamp(char16, char16, char16); uchar16 __ovld __cnfn clamp(uchar16, uchar16, uchar16); short __ovld __cnfn clamp(short, short, short); ushort __ovld __cnfn clamp(ushort, ushort, ushort); short2 __ovld __cnfn clamp(short2, short2, short2); ushort2 __ovld __cnfn clamp(ushort2, ushort2, ushort2); short3 __ovld __cnfn clamp(short3, short3, short3); ushort3 __ovld __cnfn clamp(ushort3, ushort3, ushort3); short4 __ovld __cnfn clamp(short4, short4, short4); ushort4 __ovld __cnfn clamp(ushort4, ushort4, ushort4); short8 __ovld __cnfn clamp(short8, short8, short8); ushort8 __ovld __cnfn clamp(ushort8, ushort8, ushort8); short16 __ovld __cnfn clamp(short16, short16, short16); ushort16 __ovld __cnfn clamp(ushort16, ushort16, ushort16); int __ovld __cnfn clamp(int, int, int); uint __ovld __cnfn clamp(uint, uint, uint); int2 __ovld __cnfn clamp(int2, int2, int2); uint2 __ovld __cnfn clamp(uint2, uint2, uint2); int3 __ovld __cnfn clamp(int3, int3, int3); uint3 __ovld __cnfn clamp(uint3, uint3, uint3); int4 __ovld __cnfn clamp(int4, int4, int4); uint4 __ovld __cnfn clamp(uint4, uint4, uint4); int8 __ovld __cnfn clamp(int8, int8, int8); uint8 __ovld __cnfn clamp(uint8, uint8, uint8); int16 __ovld __cnfn clamp(int16, int16, int16); uint16 __ovld __cnfn clamp(uint16, uint16, uint16); long __ovld __cnfn clamp(long, long, long); ulong __ovld __cnfn clamp(ulong, ulong, ulong); long2 __ovld __cnfn clamp(long2, long2, long2); ulong2 __ovld __cnfn clamp(ulong2, ulong2, ulong2); long3 __ovld __cnfn clamp(long3, long3, long3); ulong3 __ovld __cnfn clamp(ulong3, ulong3, ulong3); long4 __ovld __cnfn clamp(long4, long4, long4); ulong4 __ovld __cnfn clamp(ulong4, ulong4, ulong4); long8 __ovld __cnfn clamp(long8, long8, long8); ulong8 __ovld __cnfn clamp(ulong8, ulong8, ulong8); long16 __ovld __cnfn clamp(long16, long16, long16); ulong16 __ovld __cnfn clamp(ulong16, ulong16, ulong16); char2 __ovld __cnfn clamp(char2, char, char); uchar2 __ovld __cnfn clamp(uchar2, uchar, uchar); char3 __ovld __cnfn clamp(char3, char, char); uchar3 __ovld __cnfn clamp(uchar3, uchar, uchar); char4 __ovld __cnfn clamp(char4, char, char); uchar4 __ovld __cnfn clamp(uchar4, uchar, uchar); char8 __ovld __cnfn clamp(char8, char, char); uchar8 __ovld __cnfn clamp(uchar8, uchar, uchar); char16 __ovld __cnfn clamp(char16, char, char); uchar16 __ovld __cnfn clamp(uchar16, uchar, uchar); short2 __ovld __cnfn clamp(short2, short, short); ushort2 __ovld __cnfn clamp(ushort2, ushort, ushort); short3 __ovld __cnfn clamp(short3, short, short); ushort3 __ovld __cnfn clamp(ushort3, ushort, ushort); short4 __ovld __cnfn clamp(short4, short, short); ushort4 __ovld __cnfn clamp(ushort4, ushort, ushort); short8 __ovld __cnfn clamp(short8, short, short); ushort8 __ovld __cnfn clamp(ushort8, ushort, ushort); short16 __ovld __cnfn clamp(short16, short, short); ushort16 __ovld __cnfn clamp(ushort16, ushort, ushort); int2 __ovld __cnfn clamp(int2, int, int); uint2 __ovld __cnfn clamp(uint2, uint, uint); int3 __ovld __cnfn clamp(int3, int, int); uint3 __ovld __cnfn clamp(uint3, uint, uint); int4 __ovld __cnfn clamp(int4, int, int); uint4 __ovld __cnfn clamp(uint4, uint, uint); int8 __ovld __cnfn clamp(int8, int, int); uint8 __ovld __cnfn clamp(uint8, uint, uint); int16 __ovld __cnfn clamp(int16, int, int); uint16 __ovld __cnfn clamp(uint16, uint, uint); long2 __ovld __cnfn clamp(long2, long, long); ulong2 __ovld __cnfn clamp(ulong2, ulong, ulong); long3 __ovld __cnfn clamp(long3, long, long); ulong3 __ovld __cnfn clamp(ulong3, ulong, ulong); long4 __ovld __cnfn clamp(long4, long, long); ulong4 __ovld __cnfn clamp(ulong4, ulong, ulong); long8 __ovld __cnfn clamp(long8, long, long); ulong8 __ovld __cnfn clamp(ulong8, ulong, ulong); long16 __ovld __cnfn clamp(long16, long, long); ulong16 __ovld __cnfn clamp(ulong16, ulong, ulong); /** * Returns the number of leading 0-bits in x, starting * at the most significant bit position. */ char __ovld __cnfn clz(char); uchar __ovld __cnfn clz(uchar); char2 __ovld __cnfn clz(char2); uchar2 __ovld __cnfn clz(uchar2); char3 __ovld __cnfn clz(char3); uchar3 __ovld __cnfn clz(uchar3); char4 __ovld __cnfn clz(char4); uchar4 __ovld __cnfn clz(uchar4); char8 __ovld __cnfn clz(char8); uchar8 __ovld __cnfn clz(uchar8); char16 __ovld __cnfn clz(char16); uchar16 __ovld __cnfn clz(uchar16); short __ovld __cnfn clz(short); ushort __ovld __cnfn clz(ushort); short2 __ovld __cnfn clz(short2); ushort2 __ovld __cnfn clz(ushort2); short3 __ovld __cnfn clz(short3); ushort3 __ovld __cnfn clz(ushort3); short4 __ovld __cnfn clz(short4); ushort4 __ovld __cnfn clz(ushort4); short8 __ovld __cnfn clz(short8); ushort8 __ovld __cnfn clz(ushort8); short16 __ovld __cnfn clz(short16); ushort16 __ovld __cnfn clz(ushort16); int __ovld __cnfn clz(int); uint __ovld __cnfn clz(uint); int2 __ovld __cnfn clz(int2); uint2 __ovld __cnfn clz(uint2); int3 __ovld __cnfn clz(int3); uint3 __ovld __cnfn clz(uint3); int4 __ovld __cnfn clz(int4); uint4 __ovld __cnfn clz(uint4); int8 __ovld __cnfn clz(int8); uint8 __ovld __cnfn clz(uint8); int16 __ovld __cnfn clz(int16); uint16 __ovld __cnfn clz(uint16); long __ovld __cnfn clz(long); ulong __ovld __cnfn clz(ulong); long2 __ovld __cnfn clz(long2); ulong2 __ovld __cnfn clz(ulong2); long3 __ovld __cnfn clz(long3); ulong3 __ovld __cnfn clz(ulong3); long4 __ovld __cnfn clz(long4); ulong4 __ovld __cnfn clz(ulong4); long8 __ovld __cnfn clz(long8); ulong8 __ovld __cnfn clz(ulong8); long16 __ovld __cnfn clz(long16); ulong16 __ovld __cnfn clz(ulong16); /** * Returns the count of trailing 0-bits in x. If x is 0, * returns the size in bits of the type of x or * component type of x, if x is a vector. */ #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) char __ovld __cnfn ctz(char); uchar __ovld __cnfn ctz(uchar); char2 __ovld __cnfn ctz(char2); uchar2 __ovld __cnfn ctz(uchar2); char3 __ovld __cnfn ctz(char3); uchar3 __ovld __cnfn ctz(uchar3); char4 __ovld __cnfn ctz(char4); uchar4 __ovld __cnfn ctz(uchar4); char8 __ovld __cnfn ctz(char8); uchar8 __ovld __cnfn ctz(uchar8); char16 __ovld __cnfn ctz(char16); uchar16 __ovld __cnfn ctz(uchar16); short __ovld __cnfn ctz(short); ushort __ovld __cnfn ctz(ushort); short2 __ovld __cnfn ctz(short2); ushort2 __ovld __cnfn ctz(ushort2); short3 __ovld __cnfn ctz(short3); ushort3 __ovld __cnfn ctz(ushort3); short4 __ovld __cnfn ctz(short4); ushort4 __ovld __cnfn ctz(ushort4); short8 __ovld __cnfn ctz(short8); ushort8 __ovld __cnfn ctz(ushort8); short16 __ovld __cnfn ctz(short16); ushort16 __ovld __cnfn ctz(ushort16); int __ovld __cnfn ctz(int); uint __ovld __cnfn ctz(uint); int2 __ovld __cnfn ctz(int2); uint2 __ovld __cnfn ctz(uint2); int3 __ovld __cnfn ctz(int3); uint3 __ovld __cnfn ctz(uint3); int4 __ovld __cnfn ctz(int4); uint4 __ovld __cnfn ctz(uint4); int8 __ovld __cnfn ctz(int8); uint8 __ovld __cnfn ctz(uint8); int16 __ovld __cnfn ctz(int16); uint16 __ovld __cnfn ctz(uint16); long __ovld __cnfn ctz(long); ulong __ovld __cnfn ctz(ulong); long2 __ovld __cnfn ctz(long2); ulong2 __ovld __cnfn ctz(ulong2); long3 __ovld __cnfn ctz(long3); ulong3 __ovld __cnfn ctz(ulong3); long4 __ovld __cnfn ctz(long4); ulong4 __ovld __cnfn ctz(ulong4); long8 __ovld __cnfn ctz(long8); ulong8 __ovld __cnfn ctz(ulong8); long16 __ovld __cnfn ctz(long16); ulong16 __ovld __cnfn ctz(ulong16); #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) /** * Returns mul_hi(a, b) + c. */ char __ovld __cnfn mad_hi(char, char, char); uchar __ovld __cnfn mad_hi(uchar, uchar, uchar); char2 __ovld __cnfn mad_hi(char2, char2, char2); uchar2 __ovld __cnfn mad_hi(uchar2, uchar2, uchar2); char3 __ovld __cnfn mad_hi(char3, char3, char3); uchar3 __ovld __cnfn mad_hi(uchar3, uchar3, uchar3); char4 __ovld __cnfn mad_hi(char4, char4, char4); uchar4 __ovld __cnfn mad_hi(uchar4, uchar4, uchar4); char8 __ovld __cnfn mad_hi(char8, char8, char8); uchar8 __ovld __cnfn mad_hi(uchar8, uchar8, uchar8); char16 __ovld __cnfn mad_hi(char16, char16, char16); uchar16 __ovld __cnfn mad_hi(uchar16, uchar16, uchar16); short __ovld __cnfn mad_hi(short, short, short); ushort __ovld __cnfn mad_hi(ushort, ushort, ushort); short2 __ovld __cnfn mad_hi(short2, short2, short2); ushort2 __ovld __cnfn mad_hi(ushort2, ushort2, ushort2); short3 __ovld __cnfn mad_hi(short3, short3, short3); ushort3 __ovld __cnfn mad_hi(ushort3, ushort3, ushort3); short4 __ovld __cnfn mad_hi(short4, short4, short4); ushort4 __ovld __cnfn mad_hi(ushort4, ushort4, ushort4); short8 __ovld __cnfn mad_hi(short8, short8, short8); ushort8 __ovld __cnfn mad_hi(ushort8, ushort8, ushort8); short16 __ovld __cnfn mad_hi(short16, short16, short16); ushort16 __ovld __cnfn mad_hi(ushort16, ushort16, ushort16); int __ovld __cnfn mad_hi(int, int, int); uint __ovld __cnfn mad_hi(uint, uint, uint); int2 __ovld __cnfn mad_hi(int2, int2, int2); uint2 __ovld __cnfn mad_hi(uint2, uint2, uint2); int3 __ovld __cnfn mad_hi(int3, int3, int3); uint3 __ovld __cnfn mad_hi(uint3, uint3, uint3); int4 __ovld __cnfn mad_hi(int4, int4, int4); uint4 __ovld __cnfn mad_hi(uint4, uint4, uint4); int8 __ovld __cnfn mad_hi(int8, int8, int8); uint8 __ovld __cnfn mad_hi(uint8, uint8, uint8); int16 __ovld __cnfn mad_hi(int16, int16, int16); uint16 __ovld __cnfn mad_hi(uint16, uint16, uint16); long __ovld __cnfn mad_hi(long, long, long); ulong __ovld __cnfn mad_hi(ulong, ulong, ulong); long2 __ovld __cnfn mad_hi(long2, long2, long2); ulong2 __ovld __cnfn mad_hi(ulong2, ulong2, ulong2); long3 __ovld __cnfn mad_hi(long3, long3, long3); ulong3 __ovld __cnfn mad_hi(ulong3, ulong3, ulong3); long4 __ovld __cnfn mad_hi(long4, long4, long4); ulong4 __ovld __cnfn mad_hi(ulong4, ulong4, ulong4); long8 __ovld __cnfn mad_hi(long8, long8, long8); ulong8 __ovld __cnfn mad_hi(ulong8, ulong8, ulong8); long16 __ovld __cnfn mad_hi(long16, long16, long16); ulong16 __ovld __cnfn mad_hi(ulong16, ulong16, ulong16); /** * Returns a * b + c and saturates the result. */ char __ovld __cnfn mad_sat(char, char, char); uchar __ovld __cnfn mad_sat(uchar, uchar, uchar); char2 __ovld __cnfn mad_sat(char2, char2, char2); uchar2 __ovld __cnfn mad_sat(uchar2, uchar2, uchar2); char3 __ovld __cnfn mad_sat(char3, char3, char3); uchar3 __ovld __cnfn mad_sat(uchar3, uchar3, uchar3); char4 __ovld __cnfn mad_sat(char4, char4, char4); uchar4 __ovld __cnfn mad_sat(uchar4, uchar4, uchar4); char8 __ovld __cnfn mad_sat(char8, char8, char8); uchar8 __ovld __cnfn mad_sat(uchar8, uchar8, uchar8); char16 __ovld __cnfn mad_sat(char16, char16, char16); uchar16 __ovld __cnfn mad_sat(uchar16, uchar16, uchar16); short __ovld __cnfn mad_sat(short, short, short); ushort __ovld __cnfn mad_sat(ushort, ushort, ushort); short2 __ovld __cnfn mad_sat(short2, short2, short2); ushort2 __ovld __cnfn mad_sat(ushort2, ushort2, ushort2); short3 __ovld __cnfn mad_sat(short3, short3, short3); ushort3 __ovld __cnfn mad_sat(ushort3, ushort3, ushort3); short4 __ovld __cnfn mad_sat(short4, short4, short4); ushort4 __ovld __cnfn mad_sat(ushort4, ushort4, ushort4); short8 __ovld __cnfn mad_sat(short8, short8, short8); ushort8 __ovld __cnfn mad_sat(ushort8, ushort8, ushort8); short16 __ovld __cnfn mad_sat(short16, short16, short16); ushort16 __ovld __cnfn mad_sat(ushort16, ushort16, ushort16); int __ovld __cnfn mad_sat(int, int, int); uint __ovld __cnfn mad_sat(uint, uint, uint); int2 __ovld __cnfn mad_sat(int2, int2, int2); uint2 __ovld __cnfn mad_sat(uint2, uint2, uint2); int3 __ovld __cnfn mad_sat(int3, int3, int3); uint3 __ovld __cnfn mad_sat(uint3, uint3, uint3); int4 __ovld __cnfn mad_sat(int4, int4, int4); uint4 __ovld __cnfn mad_sat(uint4, uint4, uint4); int8 __ovld __cnfn mad_sat(int8, int8, int8); uint8 __ovld __cnfn mad_sat(uint8, uint8, uint8); int16 __ovld __cnfn mad_sat(int16, int16, int16); uint16 __ovld __cnfn mad_sat(uint16, uint16, uint16); long __ovld __cnfn mad_sat(long, long, long); ulong __ovld __cnfn mad_sat(ulong, ulong, ulong); long2 __ovld __cnfn mad_sat(long2, long2, long2); ulong2 __ovld __cnfn mad_sat(ulong2, ulong2, ulong2); long3 __ovld __cnfn mad_sat(long3, long3, long3); ulong3 __ovld __cnfn mad_sat(ulong3, ulong3, ulong3); long4 __ovld __cnfn mad_sat(long4, long4, long4); ulong4 __ovld __cnfn mad_sat(ulong4, ulong4, ulong4); long8 __ovld __cnfn mad_sat(long8, long8, long8); ulong8 __ovld __cnfn mad_sat(ulong8, ulong8, ulong8); long16 __ovld __cnfn mad_sat(long16, long16, long16); ulong16 __ovld __cnfn mad_sat(ulong16, ulong16, ulong16); /** * Returns y if x < y, otherwise it returns x. */ char __ovld __cnfn max(char, char); uchar __ovld __cnfn max(uchar, uchar); char2 __ovld __cnfn max(char2, char2); uchar2 __ovld __cnfn max(uchar2, uchar2); char3 __ovld __cnfn max(char3, char3); uchar3 __ovld __cnfn max(uchar3, uchar3); char4 __ovld __cnfn max(char4, char4); uchar4 __ovld __cnfn max(uchar4, uchar4); char8 __ovld __cnfn max(char8, char8); uchar8 __ovld __cnfn max(uchar8, uchar8); char16 __ovld __cnfn max(char16, char16); uchar16 __ovld __cnfn max(uchar16, uchar16); short __ovld __cnfn max(short, short); ushort __ovld __cnfn max(ushort, ushort); short2 __ovld __cnfn max(short2, short2); ushort2 __ovld __cnfn max(ushort2, ushort2); short3 __ovld __cnfn max(short3, short3); ushort3 __ovld __cnfn max(ushort3, ushort3); short4 __ovld __cnfn max(short4, short4); ushort4 __ovld __cnfn max(ushort4, ushort4); short8 __ovld __cnfn max(short8, short8); ushort8 __ovld __cnfn max(ushort8, ushort8); short16 __ovld __cnfn max(short16, short16); ushort16 __ovld __cnfn max(ushort16, ushort16); int __ovld __cnfn max(int, int); uint __ovld __cnfn max(uint, uint); int2 __ovld __cnfn max(int2, int2); uint2 __ovld __cnfn max(uint2, uint2); int3 __ovld __cnfn max(int3, int3); uint3 __ovld __cnfn max(uint3, uint3); int4 __ovld __cnfn max(int4, int4); uint4 __ovld __cnfn max(uint4, uint4); int8 __ovld __cnfn max(int8, int8); uint8 __ovld __cnfn max(uint8, uint8); int16 __ovld __cnfn max(int16, int16); uint16 __ovld __cnfn max(uint16, uint16); long __ovld __cnfn max(long, long); ulong __ovld __cnfn max(ulong, ulong); long2 __ovld __cnfn max(long2, long2); ulong2 __ovld __cnfn max(ulong2, ulong2); long3 __ovld __cnfn max(long3, long3); ulong3 __ovld __cnfn max(ulong3, ulong3); long4 __ovld __cnfn max(long4, long4); ulong4 __ovld __cnfn max(ulong4, ulong4); long8 __ovld __cnfn max(long8, long8); ulong8 __ovld __cnfn max(ulong8, ulong8); long16 __ovld __cnfn max(long16, long16); ulong16 __ovld __cnfn max(ulong16, ulong16); char2 __ovld __cnfn max(char2, char); uchar2 __ovld __cnfn max(uchar2, uchar); char3 __ovld __cnfn max(char3, char); uchar3 __ovld __cnfn max(uchar3, uchar); char4 __ovld __cnfn max(char4, char); uchar4 __ovld __cnfn max(uchar4, uchar); char8 __ovld __cnfn max(char8, char); uchar8 __ovld __cnfn max(uchar8, uchar); char16 __ovld __cnfn max(char16, char); uchar16 __ovld __cnfn max(uchar16, uchar); short2 __ovld __cnfn max(short2, short); ushort2 __ovld __cnfn max(ushort2, ushort); short3 __ovld __cnfn max(short3, short); ushort3 __ovld __cnfn max(ushort3, ushort); short4 __ovld __cnfn max(short4, short); ushort4 __ovld __cnfn max(ushort4, ushort); short8 __ovld __cnfn max(short8, short); ushort8 __ovld __cnfn max(ushort8, ushort); short16 __ovld __cnfn max(short16, short); ushort16 __ovld __cnfn max(ushort16, ushort); int2 __ovld __cnfn max(int2, int); uint2 __ovld __cnfn max(uint2, uint); int3 __ovld __cnfn max(int3, int); uint3 __ovld __cnfn max(uint3, uint); int4 __ovld __cnfn max(int4, int); uint4 __ovld __cnfn max(uint4, uint); int8 __ovld __cnfn max(int8, int); uint8 __ovld __cnfn max(uint8, uint); int16 __ovld __cnfn max(int16, int); uint16 __ovld __cnfn max(uint16, uint); long2 __ovld __cnfn max(long2, long); ulong2 __ovld __cnfn max(ulong2, ulong); long3 __ovld __cnfn max(long3, long); ulong3 __ovld __cnfn max(ulong3, ulong); long4 __ovld __cnfn max(long4, long); ulong4 __ovld __cnfn max(ulong4, ulong); long8 __ovld __cnfn max(long8, long); ulong8 __ovld __cnfn max(ulong8, ulong); long16 __ovld __cnfn max(long16, long); ulong16 __ovld __cnfn max(ulong16, ulong); /** * Returns y if y < x, otherwise it returns x. */ char __ovld __cnfn min(char, char); uchar __ovld __cnfn min(uchar, uchar); char2 __ovld __cnfn min(char2, char2); uchar2 __ovld __cnfn min(uchar2, uchar2); char3 __ovld __cnfn min(char3, char3); uchar3 __ovld __cnfn min(uchar3, uchar3); char4 __ovld __cnfn min(char4, char4); uchar4 __ovld __cnfn min(uchar4, uchar4); char8 __ovld __cnfn min(char8, char8); uchar8 __ovld __cnfn min(uchar8, uchar8); char16 __ovld __cnfn min(char16, char16); uchar16 __ovld __cnfn min(uchar16, uchar16); short __ovld __cnfn min(short, short); ushort __ovld __cnfn min(ushort, ushort); short2 __ovld __cnfn min(short2, short2); ushort2 __ovld __cnfn min(ushort2, ushort2); short3 __ovld __cnfn min(short3, short3); ushort3 __ovld __cnfn min(ushort3, ushort3); short4 __ovld __cnfn min(short4, short4); ushort4 __ovld __cnfn min(ushort4, ushort4); short8 __ovld __cnfn min(short8, short8); ushort8 __ovld __cnfn min(ushort8, ushort8); short16 __ovld __cnfn min(short16, short16); ushort16 __ovld __cnfn min(ushort16, ushort16); int __ovld __cnfn min(int, int); uint __ovld __cnfn min(uint, uint); int2 __ovld __cnfn min(int2, int2); uint2 __ovld __cnfn min(uint2, uint2); int3 __ovld __cnfn min(int3, int3); uint3 __ovld __cnfn min(uint3, uint3); int4 __ovld __cnfn min(int4, int4); uint4 __ovld __cnfn min(uint4, uint4); int8 __ovld __cnfn min(int8, int8); uint8 __ovld __cnfn min(uint8, uint8); int16 __ovld __cnfn min(int16, int16); uint16 __ovld __cnfn min(uint16, uint16); long __ovld __cnfn min(long, long); ulong __ovld __cnfn min(ulong, ulong); long2 __ovld __cnfn min(long2, long2); ulong2 __ovld __cnfn min(ulong2, ulong2); long3 __ovld __cnfn min(long3, long3); ulong3 __ovld __cnfn min(ulong3, ulong3); long4 __ovld __cnfn min(long4, long4); ulong4 __ovld __cnfn min(ulong4, ulong4); long8 __ovld __cnfn min(long8, long8); ulong8 __ovld __cnfn min(ulong8, ulong8); long16 __ovld __cnfn min(long16, long16); ulong16 __ovld __cnfn min(ulong16, ulong16); char2 __ovld __cnfn min(char2, char); uchar2 __ovld __cnfn min(uchar2, uchar); char3 __ovld __cnfn min(char3, char); uchar3 __ovld __cnfn min(uchar3, uchar); char4 __ovld __cnfn min(char4, char); uchar4 __ovld __cnfn min(uchar4, uchar); char8 __ovld __cnfn min(char8, char); uchar8 __ovld __cnfn min(uchar8, uchar); char16 __ovld __cnfn min(char16, char); uchar16 __ovld __cnfn min(uchar16, uchar); short2 __ovld __cnfn min(short2, short); ushort2 __ovld __cnfn min(ushort2, ushort); short3 __ovld __cnfn min(short3, short); ushort3 __ovld __cnfn min(ushort3, ushort); short4 __ovld __cnfn min(short4, short); ushort4 __ovld __cnfn min(ushort4, ushort); short8 __ovld __cnfn min(short8, short); ushort8 __ovld __cnfn min(ushort8, ushort); short16 __ovld __cnfn min(short16, short); ushort16 __ovld __cnfn min(ushort16, ushort); int2 __ovld __cnfn min(int2, int); uint2 __ovld __cnfn min(uint2, uint); int3 __ovld __cnfn min(int3, int); uint3 __ovld __cnfn min(uint3, uint); int4 __ovld __cnfn min(int4, int); uint4 __ovld __cnfn min(uint4, uint); int8 __ovld __cnfn min(int8, int); uint8 __ovld __cnfn min(uint8, uint); int16 __ovld __cnfn min(int16, int); uint16 __ovld __cnfn min(uint16, uint); long2 __ovld __cnfn min(long2, long); ulong2 __ovld __cnfn min(ulong2, ulong); long3 __ovld __cnfn min(long3, long); ulong3 __ovld __cnfn min(ulong3, ulong); long4 __ovld __cnfn min(long4, long); ulong4 __ovld __cnfn min(ulong4, ulong); long8 __ovld __cnfn min(long8, long); ulong8 __ovld __cnfn min(ulong8, ulong); long16 __ovld __cnfn min(long16, long); ulong16 __ovld __cnfn min(ulong16, ulong); /** * Computes x * y and returns the high half of the * product of x and y. */ char __ovld __cnfn mul_hi(char, char); uchar __ovld __cnfn mul_hi(uchar, uchar); char2 __ovld __cnfn mul_hi(char2, char2); uchar2 __ovld __cnfn mul_hi(uchar2, uchar2); char3 __ovld __cnfn mul_hi(char3, char3); uchar3 __ovld __cnfn mul_hi(uchar3, uchar3); char4 __ovld __cnfn mul_hi(char4, char4); uchar4 __ovld __cnfn mul_hi(uchar4, uchar4); char8 __ovld __cnfn mul_hi(char8, char8); uchar8 __ovld __cnfn mul_hi(uchar8, uchar8); char16 __ovld __cnfn mul_hi(char16, char16); uchar16 __ovld __cnfn mul_hi(uchar16, uchar16); short __ovld __cnfn mul_hi(short, short); ushort __ovld __cnfn mul_hi(ushort, ushort); short2 __ovld __cnfn mul_hi(short2, short2); ushort2 __ovld __cnfn mul_hi(ushort2, ushort2); short3 __ovld __cnfn mul_hi(short3, short3); ushort3 __ovld __cnfn mul_hi(ushort3, ushort3); short4 __ovld __cnfn mul_hi(short4, short4); ushort4 __ovld __cnfn mul_hi(ushort4, ushort4); short8 __ovld __cnfn mul_hi(short8, short8); ushort8 __ovld __cnfn mul_hi(ushort8, ushort8); short16 __ovld __cnfn mul_hi(short16, short16); ushort16 __ovld __cnfn mul_hi(ushort16, ushort16); int __ovld __cnfn mul_hi(int, int); uint __ovld __cnfn mul_hi(uint, uint); int2 __ovld __cnfn mul_hi(int2, int2); uint2 __ovld __cnfn mul_hi(uint2, uint2); int3 __ovld __cnfn mul_hi(int3, int3); uint3 __ovld __cnfn mul_hi(uint3, uint3); int4 __ovld __cnfn mul_hi(int4, int4); uint4 __ovld __cnfn mul_hi(uint4, uint4); int8 __ovld __cnfn mul_hi(int8, int8); uint8 __ovld __cnfn mul_hi(uint8, uint8); int16 __ovld __cnfn mul_hi(int16, int16); uint16 __ovld __cnfn mul_hi(uint16, uint16); long __ovld __cnfn mul_hi(long, long); ulong __ovld __cnfn mul_hi(ulong, ulong); long2 __ovld __cnfn mul_hi(long2, long2); ulong2 __ovld __cnfn mul_hi(ulong2, ulong2); long3 __ovld __cnfn mul_hi(long3, long3); ulong3 __ovld __cnfn mul_hi(ulong3, ulong3); long4 __ovld __cnfn mul_hi(long4, long4); ulong4 __ovld __cnfn mul_hi(ulong4, ulong4); long8 __ovld __cnfn mul_hi(long8, long8); ulong8 __ovld __cnfn mul_hi(ulong8, ulong8); long16 __ovld __cnfn mul_hi(long16, long16); ulong16 __ovld __cnfn mul_hi(ulong16, ulong16); /** * For each element in v, the bits are shifted left by * the number of bits given by the corresponding * element in i (subject to usual shift modulo rules * described in section 6.3). Bits shifted off the left * side of the element are shifted back in from the * right. */ char __ovld __cnfn rotate(char, char); uchar __ovld __cnfn rotate(uchar, uchar); char2 __ovld __cnfn rotate(char2, char2); uchar2 __ovld __cnfn rotate(uchar2, uchar2); char3 __ovld __cnfn rotate(char3, char3); uchar3 __ovld __cnfn rotate(uchar3, uchar3); char4 __ovld __cnfn rotate(char4, char4); uchar4 __ovld __cnfn rotate(uchar4, uchar4); char8 __ovld __cnfn rotate(char8, char8); uchar8 __ovld __cnfn rotate(uchar8, uchar8); char16 __ovld __cnfn rotate(char16, char16); uchar16 __ovld __cnfn rotate(uchar16, uchar16); short __ovld __cnfn rotate(short, short); ushort __ovld __cnfn rotate(ushort, ushort); short2 __ovld __cnfn rotate(short2, short2); ushort2 __ovld __cnfn rotate(ushort2, ushort2); short3 __ovld __cnfn rotate(short3, short3); ushort3 __ovld __cnfn rotate(ushort3, ushort3); short4 __ovld __cnfn rotate(short4, short4); ushort4 __ovld __cnfn rotate(ushort4, ushort4); short8 __ovld __cnfn rotate(short8, short8); ushort8 __ovld __cnfn rotate(ushort8, ushort8); short16 __ovld __cnfn rotate(short16, short16); ushort16 __ovld __cnfn rotate(ushort16, ushort16); int __ovld __cnfn rotate(int, int); uint __ovld __cnfn rotate(uint, uint); int2 __ovld __cnfn rotate(int2, int2); uint2 __ovld __cnfn rotate(uint2, uint2); int3 __ovld __cnfn rotate(int3, int3); uint3 __ovld __cnfn rotate(uint3, uint3); int4 __ovld __cnfn rotate(int4, int4); uint4 __ovld __cnfn rotate(uint4, uint4); int8 __ovld __cnfn rotate(int8, int8); uint8 __ovld __cnfn rotate(uint8, uint8); int16 __ovld __cnfn rotate(int16, int16); uint16 __ovld __cnfn rotate(uint16, uint16); long __ovld __cnfn rotate(long, long); ulong __ovld __cnfn rotate(ulong, ulong); long2 __ovld __cnfn rotate(long2, long2); ulong2 __ovld __cnfn rotate(ulong2, ulong2); long3 __ovld __cnfn rotate(long3, long3); ulong3 __ovld __cnfn rotate(ulong3, ulong3); long4 __ovld __cnfn rotate(long4, long4); ulong4 __ovld __cnfn rotate(ulong4, ulong4); long8 __ovld __cnfn rotate(long8, long8); ulong8 __ovld __cnfn rotate(ulong8, ulong8); long16 __ovld __cnfn rotate(long16, long16); ulong16 __ovld __cnfn rotate(ulong16, ulong16); /** * Returns x - y and saturates the result. */ char __ovld __cnfn sub_sat(char, char); uchar __ovld __cnfn sub_sat(uchar, uchar); char2 __ovld __cnfn sub_sat(char2, char2); uchar2 __ovld __cnfn sub_sat(uchar2, uchar2); char3 __ovld __cnfn sub_sat(char3, char3); uchar3 __ovld __cnfn sub_sat(uchar3, uchar3); char4 __ovld __cnfn sub_sat(char4, char4); uchar4 __ovld __cnfn sub_sat(uchar4, uchar4); char8 __ovld __cnfn sub_sat(char8, char8); uchar8 __ovld __cnfn sub_sat(uchar8, uchar8); char16 __ovld __cnfn sub_sat(char16, char16); uchar16 __ovld __cnfn sub_sat(uchar16, uchar16); short __ovld __cnfn sub_sat(short, short); ushort __ovld __cnfn sub_sat(ushort, ushort); short2 __ovld __cnfn sub_sat(short2, short2); ushort2 __ovld __cnfn sub_sat(ushort2, ushort2); short3 __ovld __cnfn sub_sat(short3, short3); ushort3 __ovld __cnfn sub_sat(ushort3, ushort3); short4 __ovld __cnfn sub_sat(short4, short4); ushort4 __ovld __cnfn sub_sat(ushort4, ushort4); short8 __ovld __cnfn sub_sat(short8, short8); ushort8 __ovld __cnfn sub_sat(ushort8, ushort8); short16 __ovld __cnfn sub_sat(short16, short16); ushort16 __ovld __cnfn sub_sat(ushort16, ushort16); int __ovld __cnfn sub_sat(int, int); uint __ovld __cnfn sub_sat(uint, uint); int2 __ovld __cnfn sub_sat(int2, int2); uint2 __ovld __cnfn sub_sat(uint2, uint2); int3 __ovld __cnfn sub_sat(int3, int3); uint3 __ovld __cnfn sub_sat(uint3, uint3); int4 __ovld __cnfn sub_sat(int4, int4); uint4 __ovld __cnfn sub_sat(uint4, uint4); int8 __ovld __cnfn sub_sat(int8, int8); uint8 __ovld __cnfn sub_sat(uint8, uint8); int16 __ovld __cnfn sub_sat(int16, int16); uint16 __ovld __cnfn sub_sat(uint16, uint16); long __ovld __cnfn sub_sat(long, long); ulong __ovld __cnfn sub_sat(ulong, ulong); long2 __ovld __cnfn sub_sat(long2, long2); ulong2 __ovld __cnfn sub_sat(ulong2, ulong2); long3 __ovld __cnfn sub_sat(long3, long3); ulong3 __ovld __cnfn sub_sat(ulong3, ulong3); long4 __ovld __cnfn sub_sat(long4, long4); ulong4 __ovld __cnfn sub_sat(ulong4, ulong4); long8 __ovld __cnfn sub_sat(long8, long8); ulong8 __ovld __cnfn sub_sat(ulong8, ulong8); long16 __ovld __cnfn sub_sat(long16, long16); ulong16 __ovld __cnfn sub_sat(ulong16, ulong16); /** * result[i] = ((short)hi[i] << 8) | lo[i] * result[i] = ((ushort)hi[i] << 8) | lo[i] */ short __ovld __cnfn upsample(char, uchar); ushort __ovld __cnfn upsample(uchar, uchar); short2 __ovld __cnfn upsample(char2, uchar2); short3 __ovld __cnfn upsample(char3, uchar3); short4 __ovld __cnfn upsample(char4, uchar4); short8 __ovld __cnfn upsample(char8, uchar8); short16 __ovld __cnfn upsample(char16, uchar16); ushort2 __ovld __cnfn upsample(uchar2, uchar2); ushort3 __ovld __cnfn upsample(uchar3, uchar3); ushort4 __ovld __cnfn upsample(uchar4, uchar4); ushort8 __ovld __cnfn upsample(uchar8, uchar8); ushort16 __ovld __cnfn upsample(uchar16, uchar16); /** * result[i] = ((int)hi[i] << 16) | lo[i] * result[i] = ((uint)hi[i] << 16) | lo[i] */ int __ovld __cnfn upsample(short, ushort); uint __ovld __cnfn upsample(ushort, ushort); int2 __ovld __cnfn upsample(short2, ushort2); int3 __ovld __cnfn upsample(short3, ushort3); int4 __ovld __cnfn upsample(short4, ushort4); int8 __ovld __cnfn upsample(short8, ushort8); int16 __ovld __cnfn upsample(short16, ushort16); uint2 __ovld __cnfn upsample(ushort2, ushort2); uint3 __ovld __cnfn upsample(ushort3, ushort3); uint4 __ovld __cnfn upsample(ushort4, ushort4); uint8 __ovld __cnfn upsample(ushort8, ushort8); uint16 __ovld __cnfn upsample(ushort16, ushort16); /** * result[i] = ((long)hi[i] << 32) | lo[i] * result[i] = ((ulong)hi[i] << 32) | lo[i] */ long __ovld __cnfn upsample(int, uint); ulong __ovld __cnfn upsample(uint, uint); long2 __ovld __cnfn upsample(int2, uint2); long3 __ovld __cnfn upsample(int3, uint3); long4 __ovld __cnfn upsample(int4, uint4); long8 __ovld __cnfn upsample(int8, uint8); long16 __ovld __cnfn upsample(int16, uint16); ulong2 __ovld __cnfn upsample(uint2, uint2); ulong3 __ovld __cnfn upsample(uint3, uint3); ulong4 __ovld __cnfn upsample(uint4, uint4); ulong8 __ovld __cnfn upsample(uint8, uint8); ulong16 __ovld __cnfn upsample(uint16, uint16); /* * popcount(x): returns the number of set bit in x */ #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) char __ovld __cnfn popcount(char); uchar __ovld __cnfn popcount(uchar); char2 __ovld __cnfn popcount(char2); uchar2 __ovld __cnfn popcount(uchar2); char3 __ovld __cnfn popcount(char3); uchar3 __ovld __cnfn popcount(uchar3); char4 __ovld __cnfn popcount(char4); uchar4 __ovld __cnfn popcount(uchar4); char8 __ovld __cnfn popcount(char8); uchar8 __ovld __cnfn popcount(uchar8); char16 __ovld __cnfn popcount(char16); uchar16 __ovld __cnfn popcount(uchar16); short __ovld __cnfn popcount(short); ushort __ovld __cnfn popcount(ushort); short2 __ovld __cnfn popcount(short2); ushort2 __ovld __cnfn popcount(ushort2); short3 __ovld __cnfn popcount(short3); ushort3 __ovld __cnfn popcount(ushort3); short4 __ovld __cnfn popcount(short4); ushort4 __ovld __cnfn popcount(ushort4); short8 __ovld __cnfn popcount(short8); ushort8 __ovld __cnfn popcount(ushort8); short16 __ovld __cnfn popcount(short16); ushort16 __ovld __cnfn popcount(ushort16); int __ovld __cnfn popcount(int); uint __ovld __cnfn popcount(uint); int2 __ovld __cnfn popcount(int2); uint2 __ovld __cnfn popcount(uint2); int3 __ovld __cnfn popcount(int3); uint3 __ovld __cnfn popcount(uint3); int4 __ovld __cnfn popcount(int4); uint4 __ovld __cnfn popcount(uint4); int8 __ovld __cnfn popcount(int8); uint8 __ovld __cnfn popcount(uint8); int16 __ovld __cnfn popcount(int16); uint16 __ovld __cnfn popcount(uint16); long __ovld __cnfn popcount(long); ulong __ovld __cnfn popcount(ulong); long2 __ovld __cnfn popcount(long2); ulong2 __ovld __cnfn popcount(ulong2); long3 __ovld __cnfn popcount(long3); ulong3 __ovld __cnfn popcount(ulong3); long4 __ovld __cnfn popcount(long4); ulong4 __ovld __cnfn popcount(ulong4); long8 __ovld __cnfn popcount(long8); ulong8 __ovld __cnfn popcount(ulong8); long16 __ovld __cnfn popcount(long16); ulong16 __ovld __cnfn popcount(ulong16); #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) /** * Multiply two 24-bit integer values x and y and add * the 32-bit integer result to the 32-bit integer z. * Refer to definition of mul24 to see how the 24-bit * integer multiplication is performed. */ int __ovld __cnfn mad24(int, int, int); uint __ovld __cnfn mad24(uint, uint, uint); int2 __ovld __cnfn mad24(int2, int2, int2); uint2 __ovld __cnfn mad24(uint2, uint2, uint2); int3 __ovld __cnfn mad24(int3, int3, int3); uint3 __ovld __cnfn mad24(uint3, uint3, uint3); int4 __ovld __cnfn mad24(int4, int4, int4); uint4 __ovld __cnfn mad24(uint4, uint4, uint4); int8 __ovld __cnfn mad24(int8, int8, int8); uint8 __ovld __cnfn mad24(uint8, uint8, uint8); int16 __ovld __cnfn mad24(int16, int16, int16); uint16 __ovld __cnfn mad24(uint16, uint16, uint16); /** * Multiply two 24-bit integer values x and y. x and y * are 32-bit integers but only the low 24-bits are used * to perform the multiplication. mul24 should only * be used when values in x and y are in the range [- * 2^23, 2^23-1] if x and y are signed integers and in the * range [0, 2^24-1] if x and y are unsigned integers. If * x and y are not in this range, the multiplication * result is implementation-defined. */ int __ovld __cnfn mul24(int, int); uint __ovld __cnfn mul24(uint, uint); int2 __ovld __cnfn mul24(int2, int2); uint2 __ovld __cnfn mul24(uint2, uint2); int3 __ovld __cnfn mul24(int3, int3); uint3 __ovld __cnfn mul24(uint3, uint3); int4 __ovld __cnfn mul24(int4, int4); uint4 __ovld __cnfn mul24(uint4, uint4); int8 __ovld __cnfn mul24(int8, int8); uint8 __ovld __cnfn mul24(uint8, uint8); int16 __ovld __cnfn mul24(int16, int16); uint16 __ovld __cnfn mul24(uint16, uint16); // OpenCL v1.1 s6.11.4, v1.2 s6.12.4, v2.0 s6.13.4 - Common Functions /** * Returns fmin(fmax(x, minval), maxval). * Results are undefined if minval > maxval. */ float __ovld __cnfn clamp(float, float, float); float2 __ovld __cnfn clamp(float2, float2, float2); float3 __ovld __cnfn clamp(float3, float3, float3); float4 __ovld __cnfn clamp(float4, float4, float4); float8 __ovld __cnfn clamp(float8, float8, float8); float16 __ovld __cnfn clamp(float16, float16, float16); float2 __ovld __cnfn clamp(float2, float, float); float3 __ovld __cnfn clamp(float3, float, float); float4 __ovld __cnfn clamp(float4, float, float); float8 __ovld __cnfn clamp(float8, float, float); float16 __ovld __cnfn clamp(float16, float, float); #ifdef cl_khr_fp64 double __ovld __cnfn clamp(double, double, double); double2 __ovld __cnfn clamp(double2, double2, double2); double3 __ovld __cnfn clamp(double3, double3, double3); double4 __ovld __cnfn clamp(double4, double4, double4); double8 __ovld __cnfn clamp(double8, double8, double8); double16 __ovld __cnfn clamp(double16, double16, double16); double2 __ovld __cnfn clamp(double2, double, double); double3 __ovld __cnfn clamp(double3, double, double); double4 __ovld __cnfn clamp(double4, double, double); double8 __ovld __cnfn clamp(double8, double, double); double16 __ovld __cnfn clamp(double16, double, double); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn clamp(half, half, half); half2 __ovld __cnfn clamp(half2, half2, half2); half3 __ovld __cnfn clamp(half3, half3, half3); half4 __ovld __cnfn clamp(half4, half4, half4); half8 __ovld __cnfn clamp(half8, half8, half8); half16 __ovld __cnfn clamp(half16, half16, half16); half2 __ovld __cnfn clamp(half2, half, half); half3 __ovld __cnfn clamp(half3, half, half); half4 __ovld __cnfn clamp(half4, half, half); half8 __ovld __cnfn clamp(half8, half, half); half16 __ovld __cnfn clamp(half16, half, half); #endif //cl_khr_fp16 /** * Converts radians to degrees, i.e. (180 / PI) * * radians. */ float __ovld __cnfn degrees(float); float2 __ovld __cnfn degrees(float2); float3 __ovld __cnfn degrees(float3); float4 __ovld __cnfn degrees(float4); float8 __ovld __cnfn degrees(float8); float16 __ovld __cnfn degrees(float16); #ifdef cl_khr_fp64 double __ovld __cnfn degrees(double); double2 __ovld __cnfn degrees(double2); double3 __ovld __cnfn degrees(double3); double4 __ovld __cnfn degrees(double4); double8 __ovld __cnfn degrees(double8); double16 __ovld __cnfn degrees(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn degrees(half); half2 __ovld __cnfn degrees(half2); half3 __ovld __cnfn degrees(half3); half4 __ovld __cnfn degrees(half4); half8 __ovld __cnfn degrees(half8); half16 __ovld __cnfn degrees(half16); #endif //cl_khr_fp16 /** * Returns y if x < y, otherwise it returns x. If x and y * are infinite or NaN, the return values are undefined. */ float __ovld __cnfn max(float, float); float2 __ovld __cnfn max(float2, float2); float3 __ovld __cnfn max(float3, float3); float4 __ovld __cnfn max(float4, float4); float8 __ovld __cnfn max(float8, float8); float16 __ovld __cnfn max(float16, float16); float2 __ovld __cnfn max(float2, float); float3 __ovld __cnfn max(float3, float); float4 __ovld __cnfn max(float4, float); float8 __ovld __cnfn max(float8, float); float16 __ovld __cnfn max(float16, float); #ifdef cl_khr_fp64 double __ovld __cnfn max(double, double); double2 __ovld __cnfn max(double2, double2); double3 __ovld __cnfn max(double3, double3); double4 __ovld __cnfn max(double4, double4); double8 __ovld __cnfn max(double8, double8); double16 __ovld __cnfn max(double16, double16); double2 __ovld __cnfn max(double2, double); double3 __ovld __cnfn max(double3, double); double4 __ovld __cnfn max(double4, double); double8 __ovld __cnfn max(double8, double); double16 __ovld __cnfn max(double16, double); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn max(half, half); half2 __ovld __cnfn max(half2, half2); half3 __ovld __cnfn max(half3, half3); half4 __ovld __cnfn max(half4, half4); half8 __ovld __cnfn max(half8, half8); half16 __ovld __cnfn max(half16, half16); half2 __ovld __cnfn max(half2, half); half3 __ovld __cnfn max(half3, half); half4 __ovld __cnfn max(half4, half); half8 __ovld __cnfn max(half8, half); half16 __ovld __cnfn max(half16, half); #endif //cl_khr_fp16 /** * Returns y if y < x, otherwise it returns x. If x and y * are infinite or NaN, the return values are undefined. */ float __ovld __cnfn min(float, float); float2 __ovld __cnfn min(float2, float2); float3 __ovld __cnfn min(float3, float3); float4 __ovld __cnfn min(float4, float4); float8 __ovld __cnfn min(float8, float8); float16 __ovld __cnfn min(float16, float16); float2 __ovld __cnfn min(float2, float); float3 __ovld __cnfn min(float3, float); float4 __ovld __cnfn min(float4, float); float8 __ovld __cnfn min(float8, float); float16 __ovld __cnfn min(float16, float); #ifdef cl_khr_fp64 double __ovld __cnfn min(double, double); double2 __ovld __cnfn min(double2, double2); double3 __ovld __cnfn min(double3, double3); double4 __ovld __cnfn min(double4, double4); double8 __ovld __cnfn min(double8, double8); double16 __ovld __cnfn min(double16, double16); double2 __ovld __cnfn min(double2, double); double3 __ovld __cnfn min(double3, double); double4 __ovld __cnfn min(double4, double); double8 __ovld __cnfn min(double8, double); double16 __ovld __cnfn min(double16, double); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn min(half, half); half2 __ovld __cnfn min(half2, half2); half3 __ovld __cnfn min(half3, half3); half4 __ovld __cnfn min(half4, half4); half8 __ovld __cnfn min(half8, half8); half16 __ovld __cnfn min(half16, half16); half2 __ovld __cnfn min(half2, half); half3 __ovld __cnfn min(half3, half); half4 __ovld __cnfn min(half4, half); half8 __ovld __cnfn min(half8, half); half16 __ovld __cnfn min(half16, half); #endif //cl_khr_fp16 /** * Returns the linear blend of x & y implemented as: * x + (y - x) * a * a must be a value in the range 0.0 ... 1.0. If a is not * in the range 0.0 ... 1.0, the return values are * undefined. */ float __ovld __cnfn mix(float, float, float); float2 __ovld __cnfn mix(float2, float2, float2); float3 __ovld __cnfn mix(float3, float3, float3); float4 __ovld __cnfn mix(float4, float4, float4); float8 __ovld __cnfn mix(float8, float8, float8); float16 __ovld __cnfn mix(float16, float16, float16); float2 __ovld __cnfn mix(float2, float2, float); float3 __ovld __cnfn mix(float3, float3, float); float4 __ovld __cnfn mix(float4, float4, float); float8 __ovld __cnfn mix(float8, float8, float); float16 __ovld __cnfn mix(float16, float16, float); #ifdef cl_khr_fp64 double __ovld __cnfn mix(double, double, double); double2 __ovld __cnfn mix(double2, double2, double2); double3 __ovld __cnfn mix(double3, double3, double3); double4 __ovld __cnfn mix(double4, double4, double4); double8 __ovld __cnfn mix(double8, double8, double8); double16 __ovld __cnfn mix(double16, double16, double16); double2 __ovld __cnfn mix(double2, double2, double); double3 __ovld __cnfn mix(double3, double3, double); double4 __ovld __cnfn mix(double4, double4, double); double8 __ovld __cnfn mix(double8, double8, double); double16 __ovld __cnfn mix(double16, double16, double); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn mix(half, half, half); half2 __ovld __cnfn mix(half2, half2, half2); half3 __ovld __cnfn mix(half3, half3, half3); half4 __ovld __cnfn mix(half4, half4, half4); half8 __ovld __cnfn mix(half8, half8, half8); half16 __ovld __cnfn mix(half16, half16, half16); half2 __ovld __cnfn mix(half2, half2, half); half3 __ovld __cnfn mix(half3, half3, half); half4 __ovld __cnfn mix(half4, half4, half); half8 __ovld __cnfn mix(half8, half8, half); half16 __ovld __cnfn mix(half16, half16, half); #endif //cl_khr_fp16 /** * Converts degrees to radians, i.e. (PI / 180) * * degrees. */ float __ovld __cnfn radians(float); float2 __ovld __cnfn radians(float2); float3 __ovld __cnfn radians(float3); float4 __ovld __cnfn radians(float4); float8 __ovld __cnfn radians(float8); float16 __ovld __cnfn radians(float16); #ifdef cl_khr_fp64 double __ovld __cnfn radians(double); double2 __ovld __cnfn radians(double2); double3 __ovld __cnfn radians(double3); double4 __ovld __cnfn radians(double4); double8 __ovld __cnfn radians(double8); double16 __ovld __cnfn radians(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn radians(half); half2 __ovld __cnfn radians(half2); half3 __ovld __cnfn radians(half3); half4 __ovld __cnfn radians(half4); half8 __ovld __cnfn radians(half8); half16 __ovld __cnfn radians(half16); #endif //cl_khr_fp16 /** * Returns 0.0 if x < edge, otherwise it returns 1.0. */ float __ovld __cnfn step(float, float); float2 __ovld __cnfn step(float2, float2); float3 __ovld __cnfn step(float3, float3); float4 __ovld __cnfn step(float4, float4); float8 __ovld __cnfn step(float8, float8); float16 __ovld __cnfn step(float16, float16); float2 __ovld __cnfn step(float, float2); float3 __ovld __cnfn step(float, float3); float4 __ovld __cnfn step(float, float4); float8 __ovld __cnfn step(float, float8); float16 __ovld __cnfn step(float, float16); #ifdef cl_khr_fp64 double __ovld __cnfn step(double, double); double2 __ovld __cnfn step(double2, double2); double3 __ovld __cnfn step(double3, double3); double4 __ovld __cnfn step(double4, double4); double8 __ovld __cnfn step(double8, double8); double16 __ovld __cnfn step(double16, double16); double2 __ovld __cnfn step(double, double2); double3 __ovld __cnfn step(double, double3); double4 __ovld __cnfn step(double, double4); double8 __ovld __cnfn step(double, double8); double16 __ovld __cnfn step(double, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn step(half, half); half2 __ovld __cnfn step(half2, half2); half3 __ovld __cnfn step(half3, half3); half4 __ovld __cnfn step(half4, half4); half8 __ovld __cnfn step(half8, half8); half16 __ovld __cnfn step(half16, half16); half2 __ovld __cnfn step(half, half2); half3 __ovld __cnfn step(half, half3); half4 __ovld __cnfn step(half, half4); half8 __ovld __cnfn step(half, half8); half16 __ovld __cnfn step(half, half16); #endif //cl_khr_fp16 /** * Returns 0.0 if x <= edge0 and 1.0 if x >= edge1 and * performs smooth Hermite interpolation between 0 * and 1when edge0 < x < edge1. This is useful in * cases where you would want a threshold function * with a smooth transition. * This is equivalent to: * gentype t; * t = clamp ((x - edge0) / (edge1 - edge0), 0, 1); * return t * t * (3 - 2 * t); * Results are undefined if edge0 >= edge1 or if x, * edge0 or edge1 is a NaN. */ float __ovld __cnfn smoothstep(float, float, float); float2 __ovld __cnfn smoothstep(float2, float2, float2); float3 __ovld __cnfn smoothstep(float3, float3, float3); float4 __ovld __cnfn smoothstep(float4, float4, float4); float8 __ovld __cnfn smoothstep(float8, float8, float8); float16 __ovld __cnfn smoothstep(float16, float16, float16); float2 __ovld __cnfn smoothstep(float, float, float2); float3 __ovld __cnfn smoothstep(float, float, float3); float4 __ovld __cnfn smoothstep(float, float, float4); float8 __ovld __cnfn smoothstep(float, float, float8); float16 __ovld __cnfn smoothstep(float, float, float16); #ifdef cl_khr_fp64 double __ovld __cnfn smoothstep(double, double, double); double2 __ovld __cnfn smoothstep(double2, double2, double2); double3 __ovld __cnfn smoothstep(double3, double3, double3); double4 __ovld __cnfn smoothstep(double4, double4, double4); double8 __ovld __cnfn smoothstep(double8, double8, double8); double16 __ovld __cnfn smoothstep(double16, double16, double16); double2 __ovld __cnfn smoothstep(double, double, double2); double3 __ovld __cnfn smoothstep(double, double, double3); double4 __ovld __cnfn smoothstep(double, double, double4); double8 __ovld __cnfn smoothstep(double, double, double8); double16 __ovld __cnfn smoothstep(double, double, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn smoothstep(half, half, half); half2 __ovld __cnfn smoothstep(half2, half2, half2); half3 __ovld __cnfn smoothstep(half3, half3, half3); half4 __ovld __cnfn smoothstep(half4, half4, half4); half8 __ovld __cnfn smoothstep(half8, half8, half8); half16 __ovld __cnfn smoothstep(half16, half16, half16); half2 __ovld __cnfn smoothstep(half, half, half2); half3 __ovld __cnfn smoothstep(half, half, half3); half4 __ovld __cnfn smoothstep(half, half, half4); half8 __ovld __cnfn smoothstep(half, half, half8); half16 __ovld __cnfn smoothstep(half, half, half16); #endif //cl_khr_fp16 /** * Returns 1.0 if x > 0, -0.0 if x = -0.0, +0.0 if x = * +0.0, or -1.0 if x < 0. Returns 0.0 if x is a NaN. */ float __ovld __cnfn sign(float); float2 __ovld __cnfn sign(float2); float3 __ovld __cnfn sign(float3); float4 __ovld __cnfn sign(float4); float8 __ovld __cnfn sign(float8); float16 __ovld __cnfn sign(float16); #ifdef cl_khr_fp64 double __ovld __cnfn sign(double); double2 __ovld __cnfn sign(double2); double3 __ovld __cnfn sign(double3); double4 __ovld __cnfn sign(double4); double8 __ovld __cnfn sign(double8); double16 __ovld __cnfn sign(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn sign(half); half2 __ovld __cnfn sign(half2); half3 __ovld __cnfn sign(half3); half4 __ovld __cnfn sign(half4); half8 __ovld __cnfn sign(half8); half16 __ovld __cnfn sign(half16); #endif //cl_khr_fp16 // OpenCL v1.1 s6.11.5, v1.2 s6.12.5, v2.0 s6.13.5 - Geometric Functions /** * Returns the cross product of p0.xyz and p1.xyz. The * w component of float4 result returned will be 0.0. */ float4 __ovld __cnfn cross(float4, float4); float3 __ovld __cnfn cross(float3, float3); #ifdef cl_khr_fp64 double4 __ovld __cnfn cross(double4, double4); double3 __ovld __cnfn cross(double3, double3); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half4 __ovld __cnfn cross(half4, half4); half3 __ovld __cnfn cross(half3, half3); #endif //cl_khr_fp16 /** * Compute dot product. */ float __ovld __cnfn dot(float, float); float __ovld __cnfn dot(float2, float2); float __ovld __cnfn dot(float3, float3); float __ovld __cnfn dot(float4, float4); #ifdef cl_khr_fp64 double __ovld __cnfn dot(double, double); double __ovld __cnfn dot(double2, double2); double __ovld __cnfn dot(double3, double3); double __ovld __cnfn dot(double4, double4); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn dot(half, half); half __ovld __cnfn dot(half2, half2); half __ovld __cnfn dot(half3, half3); half __ovld __cnfn dot(half4, half4); #endif //cl_khr_fp16 /** * Returns the distance between p0 and p1. This is * calculated as length(p0 - p1). */ float __ovld __cnfn distance(float, float); float __ovld __cnfn distance(float2, float2); float __ovld __cnfn distance(float3, float3); float __ovld __cnfn distance(float4, float4); #ifdef cl_khr_fp64 double __ovld __cnfn distance(double, double); double __ovld __cnfn distance(double2, double2); double __ovld __cnfn distance(double3, double3); double __ovld __cnfn distance(double4, double4); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn distance(half, half); half __ovld __cnfn distance(half2, half2); half __ovld __cnfn distance(half3, half3); half __ovld __cnfn distance(half4, half4); #endif //cl_khr_fp16 /** * Return the length of vector p, i.e., * sqrt(p.x2 + p.y 2 + ...) */ float __ovld __cnfn length(float); float __ovld __cnfn length(float2); float __ovld __cnfn length(float3); float __ovld __cnfn length(float4); #ifdef cl_khr_fp64 double __ovld __cnfn length(double); double __ovld __cnfn length(double2); double __ovld __cnfn length(double3); double __ovld __cnfn length(double4); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn length(half); half __ovld __cnfn length(half2); half __ovld __cnfn length(half3); half __ovld __cnfn length(half4); #endif //cl_khr_fp16 /** * Returns a vector in the same direction as p but with a * length of 1. */ float __ovld __cnfn normalize(float); float2 __ovld __cnfn normalize(float2); float3 __ovld __cnfn normalize(float3); float4 __ovld __cnfn normalize(float4); #ifdef cl_khr_fp64 double __ovld __cnfn normalize(double); double2 __ovld __cnfn normalize(double2); double3 __ovld __cnfn normalize(double3); double4 __ovld __cnfn normalize(double4); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn normalize(half); half2 __ovld __cnfn normalize(half2); half3 __ovld __cnfn normalize(half3); half4 __ovld __cnfn normalize(half4); #endif //cl_khr_fp16 /** * Returns fast_length(p0 - p1). */ float __ovld __cnfn fast_distance(float, float); float __ovld __cnfn fast_distance(float2, float2); float __ovld __cnfn fast_distance(float3, float3); float __ovld __cnfn fast_distance(float4, float4); /** * Returns the length of vector p computed as: * half_sqrt(p.x2 + p.y2 + ...) */ float __ovld __cnfn fast_length(float); float __ovld __cnfn fast_length(float2); float __ovld __cnfn fast_length(float3); float __ovld __cnfn fast_length(float4); /** * Returns a vector in the same direction as p but with a * length of 1. fast_normalize is computed as: * p * half_rsqrt (p.x^2 + p.y^2 + ... ) * The result shall be within 8192 ulps error from the * infinitely precise result of * if (all(p == 0.0f)) * result = p; * else * result = p / sqrt (p.x^2 + p.y^2 + ...); * with the following exceptions: * 1) If the sum of squares is greater than FLT_MAX * then the value of the floating-point values in the * result vector are undefined. * 2) If the sum of squares is less than FLT_MIN then * the implementation may return back p. * 3) If the device is in "denorms are flushed to zero" * mode, individual operand elements with magnitude * less than sqrt(FLT_MIN) may be flushed to zero * before proceeding with the calculation. */ float __ovld __cnfn fast_normalize(float); float2 __ovld __cnfn fast_normalize(float2); float3 __ovld __cnfn fast_normalize(float3); float4 __ovld __cnfn fast_normalize(float4); // OpenCL v1.1 s6.11.6, v1.2 s6.12.6, v2.0 s6.13.6 - Relational Functions /** * intn isequal (floatn x, floatn y) * Returns the component-wise compare of x == y. */ int __ovld __cnfn isequal(float, float); int2 __ovld __cnfn isequal(float2, float2); int3 __ovld __cnfn isequal(float3, float3); int4 __ovld __cnfn isequal(float4, float4); int8 __ovld __cnfn isequal(float8, float8); int16 __ovld __cnfn isequal(float16, float16); #ifdef cl_khr_fp64 int __ovld __cnfn isequal(double, double); long2 __ovld __cnfn isequal(double2, double2); long3 __ovld __cnfn isequal(double3, double3); long4 __ovld __cnfn isequal(double4, double4); long8 __ovld __cnfn isequal(double8, double8); long16 __ovld __cnfn isequal(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 int __ovld __cnfn isequal(half, half); short2 __ovld __cnfn isequal(half2, half2); short3 __ovld __cnfn isequal(half3, half3); short4 __ovld __cnfn isequal(half4, half4); short8 __ovld __cnfn isequal(half8, half8); short16 __ovld __cnfn isequal(half16, half16); #endif //cl_khr_fp16 /** * Returns the component-wise compare of x != y. */ int __ovld __cnfn isnotequal(float, float); int2 __ovld __cnfn isnotequal(float2, float2); int3 __ovld __cnfn isnotequal(float3, float3); int4 __ovld __cnfn isnotequal(float4, float4); int8 __ovld __cnfn isnotequal(float8, float8); int16 __ovld __cnfn isnotequal(float16, float16); #ifdef cl_khr_fp64 int __ovld __cnfn isnotequal(double, double); long2 __ovld __cnfn isnotequal(double2, double2); long3 __ovld __cnfn isnotequal(double3, double3); long4 __ovld __cnfn isnotequal(double4, double4); long8 __ovld __cnfn isnotequal(double8, double8); long16 __ovld __cnfn isnotequal(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 int __ovld __cnfn isnotequal(half, half); short2 __ovld __cnfn isnotequal(half2, half2); short3 __ovld __cnfn isnotequal(half3, half3); short4 __ovld __cnfn isnotequal(half4, half4); short8 __ovld __cnfn isnotequal(half8, half8); short16 __ovld __cnfn isnotequal(half16, half16); #endif //cl_khr_fp16 /** * Returns the component-wise compare of x > y. */ int __ovld __cnfn isgreater(float, float); int2 __ovld __cnfn isgreater(float2, float2); int3 __ovld __cnfn isgreater(float3, float3); int4 __ovld __cnfn isgreater(float4, float4); int8 __ovld __cnfn isgreater(float8, float8); int16 __ovld __cnfn isgreater(float16, float16); #ifdef cl_khr_fp64 int __ovld __cnfn isgreater(double, double); long2 __ovld __cnfn isgreater(double2, double2); long3 __ovld __cnfn isgreater(double3, double3); long4 __ovld __cnfn isgreater(double4, double4); long8 __ovld __cnfn isgreater(double8, double8); long16 __ovld __cnfn isgreater(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 int __ovld __cnfn isgreater(half, half); short2 __ovld __cnfn isgreater(half2, half2); short3 __ovld __cnfn isgreater(half3, half3); short4 __ovld __cnfn isgreater(half4, half4); short8 __ovld __cnfn isgreater(half8, half8); short16 __ovld __cnfn isgreater(half16, half16); #endif //cl_khr_fp16 /** * Returns the component-wise compare of x >= y. */ int __ovld __cnfn isgreaterequal(float, float); int2 __ovld __cnfn isgreaterequal(float2, float2); int3 __ovld __cnfn isgreaterequal(float3, float3); int4 __ovld __cnfn isgreaterequal(float4, float4); int8 __ovld __cnfn isgreaterequal(float8, float8); int16 __ovld __cnfn isgreaterequal(float16, float16); #ifdef cl_khr_fp64 int __ovld __cnfn isgreaterequal(double, double); long2 __ovld __cnfn isgreaterequal(double2, double2); long3 __ovld __cnfn isgreaterequal(double3, double3); long4 __ovld __cnfn isgreaterequal(double4, double4); long8 __ovld __cnfn isgreaterequal(double8, double8); long16 __ovld __cnfn isgreaterequal(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 int __ovld __cnfn isgreaterequal(half, half); short2 __ovld __cnfn isgreaterequal(half2, half2); short3 __ovld __cnfn isgreaterequal(half3, half3); short4 __ovld __cnfn isgreaterequal(half4, half4); short8 __ovld __cnfn isgreaterequal(half8, half8); short16 __ovld __cnfn isgreaterequal(half16, half16); #endif //cl_khr_fp16 /** * Returns the component-wise compare of x < y. */ int __ovld __cnfn isless(float, float); int2 __ovld __cnfn isless(float2, float2); int3 __ovld __cnfn isless(float3, float3); int4 __ovld __cnfn isless(float4, float4); int8 __ovld __cnfn isless(float8, float8); int16 __ovld __cnfn isless(float16, float16); #ifdef cl_khr_fp64 int __ovld __cnfn isless(double, double); long2 __ovld __cnfn isless(double2, double2); long3 __ovld __cnfn isless(double3, double3); long4 __ovld __cnfn isless(double4, double4); long8 __ovld __cnfn isless(double8, double8); long16 __ovld __cnfn isless(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 int __ovld __cnfn isless(half, half); short2 __ovld __cnfn isless(half2, half2); short3 __ovld __cnfn isless(half3, half3); short4 __ovld __cnfn isless(half4, half4); short8 __ovld __cnfn isless(half8, half8); short16 __ovld __cnfn isless(half16, half16); #endif //cl_khr_fp16 /** * Returns the component-wise compare of x <= y. */ int __ovld __cnfn islessequal(float, float); int2 __ovld __cnfn islessequal(float2, float2); int3 __ovld __cnfn islessequal(float3, float3); int4 __ovld __cnfn islessequal(float4, float4); int8 __ovld __cnfn islessequal(float8, float8); int16 __ovld __cnfn islessequal(float16, float16); #ifdef cl_khr_fp64 int __ovld __cnfn islessequal(double, double); long2 __ovld __cnfn islessequal(double2, double2); long3 __ovld __cnfn islessequal(double3, double3); long4 __ovld __cnfn islessequal(double4, double4); long8 __ovld __cnfn islessequal(double8, double8); long16 __ovld __cnfn islessequal(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 int __ovld __cnfn islessequal(half, half); short2 __ovld __cnfn islessequal(half2, half2); short3 __ovld __cnfn islessequal(half3, half3); short4 __ovld __cnfn islessequal(half4, half4); short8 __ovld __cnfn islessequal(half8, half8); short16 __ovld __cnfn islessequal(half16, half16); #endif //cl_khr_fp16 /** * Returns the component-wise compare of * (x < y) || (x > y) . */ int __ovld __cnfn islessgreater(float, float); int2 __ovld __cnfn islessgreater(float2, float2); int3 __ovld __cnfn islessgreater(float3, float3); int4 __ovld __cnfn islessgreater(float4, float4); int8 __ovld __cnfn islessgreater(float8, float8); int16 __ovld __cnfn islessgreater(float16, float16); #ifdef cl_khr_fp64 int __ovld __cnfn islessgreater(double, double); long2 __ovld __cnfn islessgreater(double2, double2); long3 __ovld __cnfn islessgreater(double3, double3); long4 __ovld __cnfn islessgreater(double4, double4); long8 __ovld __cnfn islessgreater(double8, double8); long16 __ovld __cnfn islessgreater(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 int __ovld __cnfn islessgreater(half, half); short2 __ovld __cnfn islessgreater(half2, half2); short3 __ovld __cnfn islessgreater(half3, half3); short4 __ovld __cnfn islessgreater(half4, half4); short8 __ovld __cnfn islessgreater(half8, half8); short16 __ovld __cnfn islessgreater(half16, half16); #endif //cl_khr_fp16 /** * Test for finite value. */ int __ovld __cnfn isfinite(float); int2 __ovld __cnfn isfinite(float2); int3 __ovld __cnfn isfinite(float3); int4 __ovld __cnfn isfinite(float4); int8 __ovld __cnfn isfinite(float8); int16 __ovld __cnfn isfinite(float16); #ifdef cl_khr_fp64 int __ovld __cnfn isfinite(double); long2 __ovld __cnfn isfinite(double2); long3 __ovld __cnfn isfinite(double3); long4 __ovld __cnfn isfinite(double4); long8 __ovld __cnfn isfinite(double8); long16 __ovld __cnfn isfinite(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 int __ovld __cnfn isfinite(half); short2 __ovld __cnfn isfinite(half2); short3 __ovld __cnfn isfinite(half3); short4 __ovld __cnfn isfinite(half4); short8 __ovld __cnfn isfinite(half8); short16 __ovld __cnfn isfinite(half16); #endif //cl_khr_fp16 /** * Test for infinity value (+ve or -ve) . */ int __ovld __cnfn isinf(float); int2 __ovld __cnfn isinf(float2); int3 __ovld __cnfn isinf(float3); int4 __ovld __cnfn isinf(float4); int8 __ovld __cnfn isinf(float8); int16 __ovld __cnfn isinf(float16); #ifdef cl_khr_fp64 int __ovld __cnfn isinf(double); long2 __ovld __cnfn isinf(double2); long3 __ovld __cnfn isinf(double3); long4 __ovld __cnfn isinf(double4); long8 __ovld __cnfn isinf(double8); long16 __ovld __cnfn isinf(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 int __ovld __cnfn isinf(half); short2 __ovld __cnfn isinf(half2); short3 __ovld __cnfn isinf(half3); short4 __ovld __cnfn isinf(half4); short8 __ovld __cnfn isinf(half8); short16 __ovld __cnfn isinf(half16); #endif //cl_khr_fp16 /** * Test for a NaN. */ int __ovld __cnfn isnan(float); int2 __ovld __cnfn isnan(float2); int3 __ovld __cnfn isnan(float3); int4 __ovld __cnfn isnan(float4); int8 __ovld __cnfn isnan(float8); int16 __ovld __cnfn isnan(float16); #ifdef cl_khr_fp64 int __ovld __cnfn isnan(double); long2 __ovld __cnfn isnan(double2); long3 __ovld __cnfn isnan(double3); long4 __ovld __cnfn isnan(double4); long8 __ovld __cnfn isnan(double8); long16 __ovld __cnfn isnan(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 int __ovld __cnfn isnan(half); short2 __ovld __cnfn isnan(half2); short3 __ovld __cnfn isnan(half3); short4 __ovld __cnfn isnan(half4); short8 __ovld __cnfn isnan(half8); short16 __ovld __cnfn isnan(half16); #endif //cl_khr_fp16 /** * Test for a normal value. */ int __ovld __cnfn isnormal(float); int2 __ovld __cnfn isnormal(float2); int3 __ovld __cnfn isnormal(float3); int4 __ovld __cnfn isnormal(float4); int8 __ovld __cnfn isnormal(float8); int16 __ovld __cnfn isnormal(float16); #ifdef cl_khr_fp64 int __ovld __cnfn isnormal(double); long2 __ovld __cnfn isnormal(double2); long3 __ovld __cnfn isnormal(double3); long4 __ovld __cnfn isnormal(double4); long8 __ovld __cnfn isnormal(double8); long16 __ovld __cnfn isnormal(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 int __ovld __cnfn isnormal(half); short2 __ovld __cnfn isnormal(half2); short3 __ovld __cnfn isnormal(half3); short4 __ovld __cnfn isnormal(half4); short8 __ovld __cnfn isnormal(half8); short16 __ovld __cnfn isnormal(half16); #endif //cl_khr_fp16 /** * Test if arguments are ordered. isordered() takes * arguments x and y, and returns the result * isequal(x, x) && isequal(y, y). */ int __ovld __cnfn isordered(float, float); int2 __ovld __cnfn isordered(float2, float2); int3 __ovld __cnfn isordered(float3, float3); int4 __ovld __cnfn isordered(float4, float4); int8 __ovld __cnfn isordered(float8, float8); int16 __ovld __cnfn isordered(float16, float16); #ifdef cl_khr_fp64 int __ovld __cnfn isordered(double, double); long2 __ovld __cnfn isordered(double2, double2); long3 __ovld __cnfn isordered(double3, double3); long4 __ovld __cnfn isordered(double4, double4); long8 __ovld __cnfn isordered(double8, double8); long16 __ovld __cnfn isordered(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 int __ovld __cnfn isordered(half, half); short2 __ovld __cnfn isordered(half2, half2); short3 __ovld __cnfn isordered(half3, half3); short4 __ovld __cnfn isordered(half4, half4); short8 __ovld __cnfn isordered(half8, half8); short16 __ovld __cnfn isordered(half16, half16); #endif //cl_khr_fp16 /** * Test if arguments are unordered. isunordered() * takes arguments x and y, returning non-zero if x or y * is NaN, and zero otherwise. */ int __ovld __cnfn isunordered(float, float); int2 __ovld __cnfn isunordered(float2, float2); int3 __ovld __cnfn isunordered(float3, float3); int4 __ovld __cnfn isunordered(float4, float4); int8 __ovld __cnfn isunordered(float8, float8); int16 __ovld __cnfn isunordered(float16, float16); #ifdef cl_khr_fp64 int __ovld __cnfn isunordered(double, double); long2 __ovld __cnfn isunordered(double2, double2); long3 __ovld __cnfn isunordered(double3, double3); long4 __ovld __cnfn isunordered(double4, double4); long8 __ovld __cnfn isunordered(double8, double8); long16 __ovld __cnfn isunordered(double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 int __ovld __cnfn isunordered(half, half); short2 __ovld __cnfn isunordered(half2, half2); short3 __ovld __cnfn isunordered(half3, half3); short4 __ovld __cnfn isunordered(half4, half4); short8 __ovld __cnfn isunordered(half8, half8); short16 __ovld __cnfn isunordered(half16, half16); #endif //cl_khr_fp16 /** * Test for sign bit. The scalar version of the function * returns a 1 if the sign bit in the float is set else returns * 0. The vector version of the function returns the * following for each component in floatn: a -1 if the * sign bit in the float is set else returns 0. */ int __ovld __cnfn signbit(float); int2 __ovld __cnfn signbit(float2); int3 __ovld __cnfn signbit(float3); int4 __ovld __cnfn signbit(float4); int8 __ovld __cnfn signbit(float8); int16 __ovld __cnfn signbit(float16); #ifdef cl_khr_fp64 int __ovld __cnfn signbit(double); long2 __ovld __cnfn signbit(double2); long3 __ovld __cnfn signbit(double3); long4 __ovld __cnfn signbit(double4); long8 __ovld __cnfn signbit(double8); long16 __ovld __cnfn signbit(double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 int __ovld __cnfn signbit(half); short2 __ovld __cnfn signbit(half2); short3 __ovld __cnfn signbit(half3); short4 __ovld __cnfn signbit(half4); short8 __ovld __cnfn signbit(half8); short16 __ovld __cnfn signbit(half16); #endif //cl_khr_fp16 /** * Returns 1 if the most significant bit in any component * of x is set; otherwise returns 0. */ int __ovld __cnfn any(char); int __ovld __cnfn any(char2); int __ovld __cnfn any(char3); int __ovld __cnfn any(char4); int __ovld __cnfn any(char8); int __ovld __cnfn any(char16); int __ovld __cnfn any(short); int __ovld __cnfn any(short2); int __ovld __cnfn any(short3); int __ovld __cnfn any(short4); int __ovld __cnfn any(short8); int __ovld __cnfn any(short16); int __ovld __cnfn any(int); int __ovld __cnfn any(int2); int __ovld __cnfn any(int3); int __ovld __cnfn any(int4); int __ovld __cnfn any(int8); int __ovld __cnfn any(int16); int __ovld __cnfn any(long); int __ovld __cnfn any(long2); int __ovld __cnfn any(long3); int __ovld __cnfn any(long4); int __ovld __cnfn any(long8); int __ovld __cnfn any(long16); /** * Returns 1 if the most significant bit in all components * of x is set; otherwise returns 0. */ int __ovld __cnfn all(char); int __ovld __cnfn all(char2); int __ovld __cnfn all(char3); int __ovld __cnfn all(char4); int __ovld __cnfn all(char8); int __ovld __cnfn all(char16); int __ovld __cnfn all(short); int __ovld __cnfn all(short2); int __ovld __cnfn all(short3); int __ovld __cnfn all(short4); int __ovld __cnfn all(short8); int __ovld __cnfn all(short16); int __ovld __cnfn all(int); int __ovld __cnfn all(int2); int __ovld __cnfn all(int3); int __ovld __cnfn all(int4); int __ovld __cnfn all(int8); int __ovld __cnfn all(int16); int __ovld __cnfn all(long); int __ovld __cnfn all(long2); int __ovld __cnfn all(long3); int __ovld __cnfn all(long4); int __ovld __cnfn all(long8); int __ovld __cnfn all(long16); /** * Each bit of the result is the corresponding bit of a if * the corresponding bit of c is 0. Otherwise it is the * corresponding bit of b. */ char __ovld __cnfn bitselect(char, char, char); uchar __ovld __cnfn bitselect(uchar, uchar, uchar); char2 __ovld __cnfn bitselect(char2, char2, char2); uchar2 __ovld __cnfn bitselect(uchar2, uchar2, uchar2); char3 __ovld __cnfn bitselect(char3, char3, char3); uchar3 __ovld __cnfn bitselect(uchar3, uchar3, uchar3); char4 __ovld __cnfn bitselect(char4, char4, char4); uchar4 __ovld __cnfn bitselect(uchar4, uchar4, uchar4); char8 __ovld __cnfn bitselect(char8, char8, char8); uchar8 __ovld __cnfn bitselect(uchar8, uchar8, uchar8); char16 __ovld __cnfn bitselect(char16, char16, char16); uchar16 __ovld __cnfn bitselect(uchar16, uchar16, uchar16); short __ovld __cnfn bitselect(short, short, short); ushort __ovld __cnfn bitselect(ushort, ushort, ushort); short2 __ovld __cnfn bitselect(short2, short2, short2); ushort2 __ovld __cnfn bitselect(ushort2, ushort2, ushort2); short3 __ovld __cnfn bitselect(short3, short3, short3); ushort3 __ovld __cnfn bitselect(ushort3, ushort3, ushort3); short4 __ovld __cnfn bitselect(short4, short4, short4); ushort4 __ovld __cnfn bitselect(ushort4, ushort4, ushort4); short8 __ovld __cnfn bitselect(short8, short8, short8); ushort8 __ovld __cnfn bitselect(ushort8, ushort8, ushort8); short16 __ovld __cnfn bitselect(short16, short16, short16); ushort16 __ovld __cnfn bitselect(ushort16, ushort16, ushort16); int __ovld __cnfn bitselect(int, int, int); uint __ovld __cnfn bitselect(uint, uint, uint); int2 __ovld __cnfn bitselect(int2, int2, int2); uint2 __ovld __cnfn bitselect(uint2, uint2, uint2); int3 __ovld __cnfn bitselect(int3, int3, int3); uint3 __ovld __cnfn bitselect(uint3, uint3, uint3); int4 __ovld __cnfn bitselect(int4, int4, int4); uint4 __ovld __cnfn bitselect(uint4, uint4, uint4); int8 __ovld __cnfn bitselect(int8, int8, int8); uint8 __ovld __cnfn bitselect(uint8, uint8, uint8); int16 __ovld __cnfn bitselect(int16, int16, int16); uint16 __ovld __cnfn bitselect(uint16, uint16, uint16); long __ovld __cnfn bitselect(long, long, long); ulong __ovld __cnfn bitselect(ulong, ulong, ulong); long2 __ovld __cnfn bitselect(long2, long2, long2); ulong2 __ovld __cnfn bitselect(ulong2, ulong2, ulong2); long3 __ovld __cnfn bitselect(long3, long3, long3); ulong3 __ovld __cnfn bitselect(ulong3, ulong3, ulong3); long4 __ovld __cnfn bitselect(long4, long4, long4); ulong4 __ovld __cnfn bitselect(ulong4, ulong4, ulong4); long8 __ovld __cnfn bitselect(long8, long8, long8); ulong8 __ovld __cnfn bitselect(ulong8, ulong8, ulong8); long16 __ovld __cnfn bitselect(long16, long16, long16); ulong16 __ovld __cnfn bitselect(ulong16, ulong16, ulong16); float __ovld __cnfn bitselect(float, float, float); float2 __ovld __cnfn bitselect(float2, float2, float2); float3 __ovld __cnfn bitselect(float3, float3, float3); float4 __ovld __cnfn bitselect(float4, float4, float4); float8 __ovld __cnfn bitselect(float8, float8, float8); float16 __ovld __cnfn bitselect(float16, float16, float16); #ifdef cl_khr_fp64 double __ovld __cnfn bitselect(double, double, double); double2 __ovld __cnfn bitselect(double2, double2, double2); double3 __ovld __cnfn bitselect(double3, double3, double3); double4 __ovld __cnfn bitselect(double4, double4, double4); double8 __ovld __cnfn bitselect(double8, double8, double8); double16 __ovld __cnfn bitselect(double16, double16, double16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn bitselect(half, half, half); half2 __ovld __cnfn bitselect(half2, half2, half2); half3 __ovld __cnfn bitselect(half3, half3, half3); half4 __ovld __cnfn bitselect(half4, half4, half4); half8 __ovld __cnfn bitselect(half8, half8, half8); half16 __ovld __cnfn bitselect(half16, half16, half16); #endif //cl_khr_fp16 /** * For each component of a vector type, * result[i] = if MSB of c[i] is set ? b[i] : a[i]. * For a scalar type, result = c ? b : a. * b and a must have the same type. * c must have the same number of elements and bits as a. */ char __ovld __cnfn select(char, char, char); uchar __ovld __cnfn select(uchar, uchar, char); char2 __ovld __cnfn select(char2, char2, char2); uchar2 __ovld __cnfn select(uchar2, uchar2, char2); char3 __ovld __cnfn select(char3, char3, char3); uchar3 __ovld __cnfn select(uchar3, uchar3, char3); char4 __ovld __cnfn select(char4, char4, char4); uchar4 __ovld __cnfn select(uchar4, uchar4, char4); char8 __ovld __cnfn select(char8, char8, char8); uchar8 __ovld __cnfn select(uchar8, uchar8, char8); char16 __ovld __cnfn select(char16, char16, char16); uchar16 __ovld __cnfn select(uchar16, uchar16, char16); short __ovld __cnfn select(short, short, short); ushort __ovld __cnfn select(ushort, ushort, short); short2 __ovld __cnfn select(short2, short2, short2); ushort2 __ovld __cnfn select(ushort2, ushort2, short2); short3 __ovld __cnfn select(short3, short3, short3); ushort3 __ovld __cnfn select(ushort3, ushort3, short3); short4 __ovld __cnfn select(short4, short4, short4); ushort4 __ovld __cnfn select(ushort4, ushort4, short4); short8 __ovld __cnfn select(short8, short8, short8); ushort8 __ovld __cnfn select(ushort8, ushort8, short8); short16 __ovld __cnfn select(short16, short16, short16); ushort16 __ovld __cnfn select(ushort16, ushort16, short16); int __ovld __cnfn select(int, int, int); uint __ovld __cnfn select(uint, uint, int); int2 __ovld __cnfn select(int2, int2, int2); uint2 __ovld __cnfn select(uint2, uint2, int2); int3 __ovld __cnfn select(int3, int3, int3); uint3 __ovld __cnfn select(uint3, uint3, int3); int4 __ovld __cnfn select(int4, int4, int4); uint4 __ovld __cnfn select(uint4, uint4, int4); int8 __ovld __cnfn select(int8, int8, int8); uint8 __ovld __cnfn select(uint8, uint8, int8); int16 __ovld __cnfn select(int16, int16, int16); uint16 __ovld __cnfn select(uint16, uint16, int16); float __ovld __cnfn select(float, float, int); float2 __ovld __cnfn select(float2, float2, int2); float3 __ovld __cnfn select(float3, float3, int3); float4 __ovld __cnfn select(float4, float4, int4); float8 __ovld __cnfn select(float8, float8, int8); float16 __ovld __cnfn select(float16, float16, int16); long __ovld __cnfn select(long, long, long); ulong __ovld __cnfn select(ulong, ulong, long); long2 __ovld __cnfn select(long2, long2, long2); ulong2 __ovld __cnfn select(ulong2, ulong2, long2); long3 __ovld __cnfn select(long3, long3, long3); ulong3 __ovld __cnfn select(ulong3, ulong3, long3); long4 __ovld __cnfn select(long4, long4, long4); ulong4 __ovld __cnfn select(ulong4, ulong4, long4); long8 __ovld __cnfn select(long8, long8, long8); ulong8 __ovld __cnfn select(ulong8, ulong8, long8); long16 __ovld __cnfn select(long16, long16, long16); ulong16 __ovld __cnfn select(ulong16, ulong16, long16); char __ovld __cnfn select(char, char, uchar); uchar __ovld __cnfn select(uchar, uchar, uchar); char2 __ovld __cnfn select(char2, char2, uchar2); uchar2 __ovld __cnfn select(uchar2, uchar2, uchar2); char3 __ovld __cnfn select(char3, char3, uchar3); uchar3 __ovld __cnfn select(uchar3, uchar3, uchar3); char4 __ovld __cnfn select(char4, char4, uchar4); uchar4 __ovld __cnfn select(uchar4, uchar4, uchar4); char8 __ovld __cnfn select(char8, char8, uchar8); uchar8 __ovld __cnfn select(uchar8, uchar8, uchar8); char16 __ovld __cnfn select(char16, char16, uchar16); uchar16 __ovld __cnfn select(uchar16, uchar16, uchar16); short __ovld __cnfn select(short, short, ushort); ushort __ovld __cnfn select(ushort, ushort, ushort); short2 __ovld __cnfn select(short2, short2, ushort2); ushort2 __ovld __cnfn select(ushort2, ushort2, ushort2); short3 __ovld __cnfn select(short3, short3, ushort3); ushort3 __ovld __cnfn select(ushort3, ushort3, ushort3); short4 __ovld __cnfn select(short4, short4, ushort4); ushort4 __ovld __cnfn select(ushort4, ushort4, ushort4); short8 __ovld __cnfn select(short8, short8, ushort8); ushort8 __ovld __cnfn select(ushort8, ushort8, ushort8); short16 __ovld __cnfn select(short16, short16, ushort16); ushort16 __ovld __cnfn select(ushort16, ushort16, ushort16); int __ovld __cnfn select(int, int, uint); uint __ovld __cnfn select(uint, uint, uint); int2 __ovld __cnfn select(int2, int2, uint2); uint2 __ovld __cnfn select(uint2, uint2, uint2); int3 __ovld __cnfn select(int3, int3, uint3); uint3 __ovld __cnfn select(uint3, uint3, uint3); int4 __ovld __cnfn select(int4, int4, uint4); uint4 __ovld __cnfn select(uint4, uint4, uint4); int8 __ovld __cnfn select(int8, int8, uint8); uint8 __ovld __cnfn select(uint8, uint8, uint8); int16 __ovld __cnfn select(int16, int16, uint16); uint16 __ovld __cnfn select(uint16, uint16, uint16); float __ovld __cnfn select(float, float, uint); float2 __ovld __cnfn select(float2, float2, uint2); float3 __ovld __cnfn select(float3, float3, uint3); float4 __ovld __cnfn select(float4, float4, uint4); float8 __ovld __cnfn select(float8, float8, uint8); float16 __ovld __cnfn select(float16, float16, uint16); long __ovld __cnfn select(long, long, ulong); ulong __ovld __cnfn select(ulong, ulong, ulong); long2 __ovld __cnfn select(long2, long2, ulong2); ulong2 __ovld __cnfn select(ulong2, ulong2, ulong2); long3 __ovld __cnfn select(long3, long3, ulong3); ulong3 __ovld __cnfn select(ulong3, ulong3, ulong3); long4 __ovld __cnfn select(long4, long4, ulong4); ulong4 __ovld __cnfn select(ulong4, ulong4, ulong4); long8 __ovld __cnfn select(long8, long8, ulong8); ulong8 __ovld __cnfn select(ulong8, ulong8, ulong8); long16 __ovld __cnfn select(long16, long16, ulong16); ulong16 __ovld __cnfn select(ulong16, ulong16, ulong16); #ifdef cl_khr_fp64 double __ovld __cnfn select(double, double, long); double2 __ovld __cnfn select(double2, double2, long2); double3 __ovld __cnfn select(double3, double3, long3); double4 __ovld __cnfn select(double4, double4, long4); double8 __ovld __cnfn select(double8, double8, long8); double16 __ovld __cnfn select(double16, double16, long16); double __ovld __cnfn select(double, double, ulong); double2 __ovld __cnfn select(double2, double2, ulong2); double3 __ovld __cnfn select(double3, double3, ulong3); double4 __ovld __cnfn select(double4, double4, ulong4); double8 __ovld __cnfn select(double8, double8, ulong8); double16 __ovld __cnfn select(double16, double16, ulong16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __cnfn select(half, half, short); half2 __ovld __cnfn select(half2, half2, short2); half3 __ovld __cnfn select(half3, half3, short3); half4 __ovld __cnfn select(half4, half4, short4); half8 __ovld __cnfn select(half8, half8, short8); half16 __ovld __cnfn select(half16, half16, short16); half __ovld __cnfn select(half, half, ushort); half2 __ovld __cnfn select(half2, half2, ushort2); half3 __ovld __cnfn select(half3, half3, ushort3); half4 __ovld __cnfn select(half4, half4, ushort4); half8 __ovld __cnfn select(half8, half8, ushort8); half16 __ovld __cnfn select(half16, half16, ushort16); #endif //cl_khr_fp16 // OpenCL v1.1 s6.11.7, v1.2 s6.12.7, v2.0 s6.13.7 - Vector Data Load and Store Functions // OpenCL extensions v1.1 s9.6.6, v1.2 s9.5.6, v2.0 s9.4.6 - Vector Data Load and Store Functions for Half Type /** * Use generic type gentype to indicate the built-in data types * char, uchar, short, ushort, int, uint, long, ulong, float, * double or half. * * vloadn return sizeof (gentypen) bytes of data read from address (p + (offset * n)). * * vstoren write sizeof (gentypen) bytes given by data to address (p + (offset * n)). * * The address computed as (p + (offset * n)) must be * 8-bit aligned if gentype is char, uchar; * 16-bit aligned if gentype is short, ushort, half; * 32-bit aligned if gentype is int, uint, float; * 64-bit aligned if gentype is long, ulong, double. */ char2 __ovld __purefn vload2(size_t, const __constant char *); uchar2 __ovld __purefn vload2(size_t, const __constant uchar *); short2 __ovld __purefn vload2(size_t, const __constant short *); ushort2 __ovld __purefn vload2(size_t, const __constant ushort *); int2 __ovld __purefn vload2(size_t, const __constant int *); uint2 __ovld __purefn vload2(size_t, const __constant uint *); long2 __ovld __purefn vload2(size_t, const __constant long *); ulong2 __ovld __purefn vload2(size_t, const __constant ulong *); float2 __ovld __purefn vload2(size_t, const __constant float *); char3 __ovld __purefn vload3(size_t, const __constant char *); uchar3 __ovld __purefn vload3(size_t, const __constant uchar *); short3 __ovld __purefn vload3(size_t, const __constant short *); ushort3 __ovld __purefn vload3(size_t, const __constant ushort *); int3 __ovld __purefn vload3(size_t, const __constant int *); uint3 __ovld __purefn vload3(size_t, const __constant uint *); long3 __ovld __purefn vload3(size_t, const __constant long *); ulong3 __ovld __purefn vload3(size_t, const __constant ulong *); float3 __ovld __purefn vload3(size_t, const __constant float *); char4 __ovld __purefn vload4(size_t, const __constant char *); uchar4 __ovld __purefn vload4(size_t, const __constant uchar *); short4 __ovld __purefn vload4(size_t, const __constant short *); ushort4 __ovld __purefn vload4(size_t, const __constant ushort *); int4 __ovld __purefn vload4(size_t, const __constant int *); uint4 __ovld __purefn vload4(size_t, const __constant uint *); long4 __ovld __purefn vload4(size_t, const __constant long *); ulong4 __ovld __purefn vload4(size_t, const __constant ulong *); float4 __ovld __purefn vload4(size_t, const __constant float *); char8 __ovld __purefn vload8(size_t, const __constant char *); uchar8 __ovld __purefn vload8(size_t, const __constant uchar *); short8 __ovld __purefn vload8(size_t, const __constant short *); ushort8 __ovld __purefn vload8(size_t, const __constant ushort *); int8 __ovld __purefn vload8(size_t, const __constant int *); uint8 __ovld __purefn vload8(size_t, const __constant uint *); long8 __ovld __purefn vload8(size_t, const __constant long *); ulong8 __ovld __purefn vload8(size_t, const __constant ulong *); float8 __ovld __purefn vload8(size_t, const __constant float *); char16 __ovld __purefn vload16(size_t, const __constant char *); uchar16 __ovld __purefn vload16(size_t, const __constant uchar *); short16 __ovld __purefn vload16(size_t, const __constant short *); ushort16 __ovld __purefn vload16(size_t, const __constant ushort *); int16 __ovld __purefn vload16(size_t, const __constant int *); uint16 __ovld __purefn vload16(size_t, const __constant uint *); long16 __ovld __purefn vload16(size_t, const __constant long *); ulong16 __ovld __purefn vload16(size_t, const __constant ulong *); float16 __ovld __purefn vload16(size_t, const __constant float *); #ifdef cl_khr_fp64 double2 __ovld __purefn vload2(size_t, const __constant double *); double3 __ovld __purefn vload3(size_t, const __constant double *); double4 __ovld __purefn vload4(size_t, const __constant double *); double8 __ovld __purefn vload8(size_t, const __constant double *); double16 __ovld __purefn vload16(size_t, const __constant double *); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half2 __ovld __purefn vload2(size_t, const __constant half *); half3 __ovld __purefn vload3(size_t, const __constant half *); half4 __ovld __purefn vload4(size_t, const __constant half *); half8 __ovld __purefn vload8(size_t, const __constant half *); half16 __ovld __purefn vload16(size_t, const __constant half *); #endif //cl_khr_fp16 #if defined(__opencl_c_generic_address_space) char2 __ovld __purefn vload2(size_t, const char *); uchar2 __ovld __purefn vload2(size_t, const uchar *); short2 __ovld __purefn vload2(size_t, const short *); ushort2 __ovld __purefn vload2(size_t, const ushort *); int2 __ovld __purefn vload2(size_t, const int *); uint2 __ovld __purefn vload2(size_t, const uint *); long2 __ovld __purefn vload2(size_t, const long *); ulong2 __ovld __purefn vload2(size_t, const ulong *); float2 __ovld __purefn vload2(size_t, const float *); char3 __ovld __purefn vload3(size_t, const char *); uchar3 __ovld __purefn vload3(size_t, const uchar *); short3 __ovld __purefn vload3(size_t, const short *); ushort3 __ovld __purefn vload3(size_t, const ushort *); int3 __ovld __purefn vload3(size_t, const int *); uint3 __ovld __purefn vload3(size_t, const uint *); long3 __ovld __purefn vload3(size_t, const long *); ulong3 __ovld __purefn vload3(size_t, const ulong *); float3 __ovld __purefn vload3(size_t, const float *); char4 __ovld __purefn vload4(size_t, const char *); uchar4 __ovld __purefn vload4(size_t, const uchar *); short4 __ovld __purefn vload4(size_t, const short *); ushort4 __ovld __purefn vload4(size_t, const ushort *); int4 __ovld __purefn vload4(size_t, const int *); uint4 __ovld __purefn vload4(size_t, const uint *); long4 __ovld __purefn vload4(size_t, const long *); ulong4 __ovld __purefn vload4(size_t, const ulong *); float4 __ovld __purefn vload4(size_t, const float *); char8 __ovld __purefn vload8(size_t, const char *); uchar8 __ovld __purefn vload8(size_t, const uchar *); short8 __ovld __purefn vload8(size_t, const short *); ushort8 __ovld __purefn vload8(size_t, const ushort *); int8 __ovld __purefn vload8(size_t, const int *); uint8 __ovld __purefn vload8(size_t, const uint *); long8 __ovld __purefn vload8(size_t, const long *); ulong8 __ovld __purefn vload8(size_t, const ulong *); float8 __ovld __purefn vload8(size_t, const float *); char16 __ovld __purefn vload16(size_t, const char *); uchar16 __ovld __purefn vload16(size_t, const uchar *); short16 __ovld __purefn vload16(size_t, const short *); ushort16 __ovld __purefn vload16(size_t, const ushort *); int16 __ovld __purefn vload16(size_t, const int *); uint16 __ovld __purefn vload16(size_t, const uint *); long16 __ovld __purefn vload16(size_t, const long *); ulong16 __ovld __purefn vload16(size_t, const ulong *); float16 __ovld __purefn vload16(size_t, const float *); #ifdef cl_khr_fp64 double2 __ovld __purefn vload2(size_t, const double *); double3 __ovld __purefn vload3(size_t, const double *); double4 __ovld __purefn vload4(size_t, const double *); double8 __ovld __purefn vload8(size_t, const double *); double16 __ovld __purefn vload16(size_t, const double *); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half2 __ovld __purefn vload2(size_t, const half *); half3 __ovld __purefn vload3(size_t, const half *); half4 __ovld __purefn vload4(size_t, const half *); half8 __ovld __purefn vload8(size_t, const half *); half16 __ovld __purefn vload16(size_t, const half *); #endif //cl_khr_fp16 #endif //defined(__opencl_c_generic_address_space) #if defined(__opencl_c_named_address_space_builtins) char2 __ovld __purefn vload2(size_t, const __global char *); uchar2 __ovld __purefn vload2(size_t, const __global uchar *); short2 __ovld __purefn vload2(size_t, const __global short *); ushort2 __ovld __purefn vload2(size_t, const __global ushort *); int2 __ovld __purefn vload2(size_t, const __global int *); uint2 __ovld __purefn vload2(size_t, const __global uint *); long2 __ovld __purefn vload2(size_t, const __global long *); ulong2 __ovld __purefn vload2(size_t, const __global ulong *); float2 __ovld __purefn vload2(size_t, const __global float *); char3 __ovld __purefn vload3(size_t, const __global char *); uchar3 __ovld __purefn vload3(size_t, const __global uchar *); short3 __ovld __purefn vload3(size_t, const __global short *); ushort3 __ovld __purefn vload3(size_t, const __global ushort *); int3 __ovld __purefn vload3(size_t, const __global int *); uint3 __ovld __purefn vload3(size_t, const __global uint *); long3 __ovld __purefn vload3(size_t, const __global long *); ulong3 __ovld __purefn vload3(size_t, const __global ulong *); float3 __ovld __purefn vload3(size_t, const __global float *); char4 __ovld __purefn vload4(size_t, const __global char *); uchar4 __ovld __purefn vload4(size_t, const __global uchar *); short4 __ovld __purefn vload4(size_t, const __global short *); ushort4 __ovld __purefn vload4(size_t, const __global ushort *); int4 __ovld __purefn vload4(size_t, const __global int *); uint4 __ovld __purefn vload4(size_t, const __global uint *); long4 __ovld __purefn vload4(size_t, const __global long *); ulong4 __ovld __purefn vload4(size_t, const __global ulong *); float4 __ovld __purefn vload4(size_t, const __global float *); char8 __ovld __purefn vload8(size_t, const __global char *); uchar8 __ovld __purefn vload8(size_t, const __global uchar *); short8 __ovld __purefn vload8(size_t, const __global short *); ushort8 __ovld __purefn vload8(size_t, const __global ushort *); int8 __ovld __purefn vload8(size_t, const __global int *); uint8 __ovld __purefn vload8(size_t, const __global uint *); long8 __ovld __purefn vload8(size_t, const __global long *); ulong8 __ovld __purefn vload8(size_t, const __global ulong *); float8 __ovld __purefn vload8(size_t, const __global float *); char16 __ovld __purefn vload16(size_t, const __global char *); uchar16 __ovld __purefn vload16(size_t, const __global uchar *); short16 __ovld __purefn vload16(size_t, const __global short *); ushort16 __ovld __purefn vload16(size_t, const __global ushort *); int16 __ovld __purefn vload16(size_t, const __global int *); uint16 __ovld __purefn vload16(size_t, const __global uint *); long16 __ovld __purefn vload16(size_t, const __global long *); ulong16 __ovld __purefn vload16(size_t, const __global ulong *); float16 __ovld __purefn vload16(size_t, const __global float *); char2 __ovld __purefn vload2(size_t, const __local char *); uchar2 __ovld __purefn vload2(size_t, const __local uchar *); short2 __ovld __purefn vload2(size_t, const __local short *); ushort2 __ovld __purefn vload2(size_t, const __local ushort *); int2 __ovld __purefn vload2(size_t, const __local int *); uint2 __ovld __purefn vload2(size_t, const __local uint *); long2 __ovld __purefn vload2(size_t, const __local long *); ulong2 __ovld __purefn vload2(size_t, const __local ulong *); float2 __ovld __purefn vload2(size_t, const __local float *); char3 __ovld __purefn vload3(size_t, const __local char *); uchar3 __ovld __purefn vload3(size_t, const __local uchar *); short3 __ovld __purefn vload3(size_t, const __local short *); ushort3 __ovld __purefn vload3(size_t, const __local ushort *); int3 __ovld __purefn vload3(size_t, const __local int *); uint3 __ovld __purefn vload3(size_t, const __local uint *); long3 __ovld __purefn vload3(size_t, const __local long *); ulong3 __ovld __purefn vload3(size_t, const __local ulong *); float3 __ovld __purefn vload3(size_t, const __local float *); char4 __ovld __purefn vload4(size_t, const __local char *); uchar4 __ovld __purefn vload4(size_t, const __local uchar *); short4 __ovld __purefn vload4(size_t, const __local short *); ushort4 __ovld __purefn vload4(size_t, const __local ushort *); int4 __ovld __purefn vload4(size_t, const __local int *); uint4 __ovld __purefn vload4(size_t, const __local uint *); long4 __ovld __purefn vload4(size_t, const __local long *); ulong4 __ovld __purefn vload4(size_t, const __local ulong *); float4 __ovld __purefn vload4(size_t, const __local float *); char8 __ovld __purefn vload8(size_t, const __local char *); uchar8 __ovld __purefn vload8(size_t, const __local uchar *); short8 __ovld __purefn vload8(size_t, const __local short *); ushort8 __ovld __purefn vload8(size_t, const __local ushort *); int8 __ovld __purefn vload8(size_t, const __local int *); uint8 __ovld __purefn vload8(size_t, const __local uint *); long8 __ovld __purefn vload8(size_t, const __local long *); ulong8 __ovld __purefn vload8(size_t, const __local ulong *); float8 __ovld __purefn vload8(size_t, const __local float *); char16 __ovld __purefn vload16(size_t, const __local char *); uchar16 __ovld __purefn vload16(size_t, const __local uchar *); short16 __ovld __purefn vload16(size_t, const __local short *); ushort16 __ovld __purefn vload16(size_t, const __local ushort *); int16 __ovld __purefn vload16(size_t, const __local int *); uint16 __ovld __purefn vload16(size_t, const __local uint *); long16 __ovld __purefn vload16(size_t, const __local long *); ulong16 __ovld __purefn vload16(size_t, const __local ulong *); float16 __ovld __purefn vload16(size_t, const __local float *); char2 __ovld __purefn vload2(size_t, const __private char *); uchar2 __ovld __purefn vload2(size_t, const __private uchar *); short2 __ovld __purefn vload2(size_t, const __private short *); ushort2 __ovld __purefn vload2(size_t, const __private ushort *); int2 __ovld __purefn vload2(size_t, const __private int *); uint2 __ovld __purefn vload2(size_t, const __private uint *); long2 __ovld __purefn vload2(size_t, const __private long *); ulong2 __ovld __purefn vload2(size_t, const __private ulong *); float2 __ovld __purefn vload2(size_t, const __private float *); char3 __ovld __purefn vload3(size_t, const __private char *); uchar3 __ovld __purefn vload3(size_t, const __private uchar *); short3 __ovld __purefn vload3(size_t, const __private short *); ushort3 __ovld __purefn vload3(size_t, const __private ushort *); int3 __ovld __purefn vload3(size_t, const __private int *); uint3 __ovld __purefn vload3(size_t, const __private uint *); long3 __ovld __purefn vload3(size_t, const __private long *); ulong3 __ovld __purefn vload3(size_t, const __private ulong *); float3 __ovld __purefn vload3(size_t, const __private float *); char4 __ovld __purefn vload4(size_t, const __private char *); uchar4 __ovld __purefn vload4(size_t, const __private uchar *); short4 __ovld __purefn vload4(size_t, const __private short *); ushort4 __ovld __purefn vload4(size_t, const __private ushort *); int4 __ovld __purefn vload4(size_t, const __private int *); uint4 __ovld __purefn vload4(size_t, const __private uint *); long4 __ovld __purefn vload4(size_t, const __private long *); ulong4 __ovld __purefn vload4(size_t, const __private ulong *); float4 __ovld __purefn vload4(size_t, const __private float *); char8 __ovld __purefn vload8(size_t, const __private char *); uchar8 __ovld __purefn vload8(size_t, const __private uchar *); short8 __ovld __purefn vload8(size_t, const __private short *); ushort8 __ovld __purefn vload8(size_t, const __private ushort *); int8 __ovld __purefn vload8(size_t, const __private int *); uint8 __ovld __purefn vload8(size_t, const __private uint *); long8 __ovld __purefn vload8(size_t, const __private long *); ulong8 __ovld __purefn vload8(size_t, const __private ulong *); float8 __ovld __purefn vload8(size_t, const __private float *); char16 __ovld __purefn vload16(size_t, const __private char *); uchar16 __ovld __purefn vload16(size_t, const __private uchar *); short16 __ovld __purefn vload16(size_t, const __private short *); ushort16 __ovld __purefn vload16(size_t, const __private ushort *); int16 __ovld __purefn vload16(size_t, const __private int *); uint16 __ovld __purefn vload16(size_t, const __private uint *); long16 __ovld __purefn vload16(size_t, const __private long *); ulong16 __ovld __purefn vload16(size_t, const __private ulong *); float16 __ovld __purefn vload16(size_t, const __private float *); #ifdef cl_khr_fp64 double2 __ovld __purefn vload2(size_t, const __global double *); double3 __ovld __purefn vload3(size_t, const __global double *); double4 __ovld __purefn vload4(size_t, const __global double *); double8 __ovld __purefn vload8(size_t, const __global double *); double16 __ovld __purefn vload16(size_t, const __global double *); double2 __ovld __purefn vload2(size_t, const __local double *); double3 __ovld __purefn vload3(size_t, const __local double *); double4 __ovld __purefn vload4(size_t, const __local double *); double8 __ovld __purefn vload8(size_t, const __local double *); double16 __ovld __purefn vload16(size_t, const __local double *); double2 __ovld __purefn vload2(size_t, const __private double *); double3 __ovld __purefn vload3(size_t, const __private double *); double4 __ovld __purefn vload4(size_t, const __private double *); double8 __ovld __purefn vload8(size_t, const __private double *); double16 __ovld __purefn vload16(size_t, const __private double *); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half2 __ovld __purefn vload2(size_t, const __global half *); half3 __ovld __purefn vload3(size_t, const __global half *); half4 __ovld __purefn vload4(size_t, const __global half *); half8 __ovld __purefn vload8(size_t, const __global half *); half16 __ovld __purefn vload16(size_t, const __global half *); half2 __ovld __purefn vload2(size_t, const __local half *); half3 __ovld __purefn vload3(size_t, const __local half *); half4 __ovld __purefn vload4(size_t, const __local half *); half8 __ovld __purefn vload8(size_t, const __local half *); half16 __ovld __purefn vload16(size_t, const __local half *); half2 __ovld __purefn vload2(size_t, const __private half *); half3 __ovld __purefn vload3(size_t, const __private half *); half4 __ovld __purefn vload4(size_t, const __private half *); half8 __ovld __purefn vload8(size_t, const __private half *); half16 __ovld __purefn vload16(size_t, const __private half *); #endif //cl_khr_fp16 #endif //defined(__opencl_c_named_address_space_builtins) #if defined(__opencl_c_generic_address_space) void __ovld vstore2(char2, size_t, char *); void __ovld vstore2(uchar2, size_t, uchar *); void __ovld vstore2(short2, size_t, short *); void __ovld vstore2(ushort2, size_t, ushort *); void __ovld vstore2(int2, size_t, int *); void __ovld vstore2(uint2, size_t, uint *); void __ovld vstore2(long2, size_t, long *); void __ovld vstore2(ulong2, size_t, ulong *); void __ovld vstore2(float2, size_t, float *); void __ovld vstore3(char3, size_t, char *); void __ovld vstore3(uchar3, size_t, uchar *); void __ovld vstore3(short3, size_t, short *); void __ovld vstore3(ushort3, size_t, ushort *); void __ovld vstore3(int3, size_t, int *); void __ovld vstore3(uint3, size_t, uint *); void __ovld vstore3(long3, size_t, long *); void __ovld vstore3(ulong3, size_t, ulong *); void __ovld vstore3(float3, size_t, float *); void __ovld vstore4(char4, size_t, char *); void __ovld vstore4(uchar4, size_t, uchar *); void __ovld vstore4(short4, size_t, short *); void __ovld vstore4(ushort4, size_t, ushort *); void __ovld vstore4(int4, size_t, int *); void __ovld vstore4(uint4, size_t, uint *); void __ovld vstore4(long4, size_t, long *); void __ovld vstore4(ulong4, size_t, ulong *); void __ovld vstore4(float4, size_t, float *); void __ovld vstore8(char8, size_t, char *); void __ovld vstore8(uchar8, size_t, uchar *); void __ovld vstore8(short8, size_t, short *); void __ovld vstore8(ushort8, size_t, ushort *); void __ovld vstore8(int8, size_t, int *); void __ovld vstore8(uint8, size_t, uint *); void __ovld vstore8(long8, size_t, long *); void __ovld vstore8(ulong8, size_t, ulong *); void __ovld vstore8(float8, size_t, float *); void __ovld vstore16(char16, size_t, char *); void __ovld vstore16(uchar16, size_t, uchar *); void __ovld vstore16(short16, size_t, short *); void __ovld vstore16(ushort16, size_t, ushort *); void __ovld vstore16(int16, size_t, int *); void __ovld vstore16(uint16, size_t, uint *); void __ovld vstore16(long16, size_t, long *); void __ovld vstore16(ulong16, size_t, ulong *); void __ovld vstore16(float16, size_t, float *); #ifdef cl_khr_fp64 void __ovld vstore2(double2, size_t, double *); void __ovld vstore3(double3, size_t, double *); void __ovld vstore4(double4, size_t, double *); void __ovld vstore8(double8, size_t, double *); void __ovld vstore16(double16, size_t, double *); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 void __ovld vstore2(half2, size_t, half *); void __ovld vstore3(half3, size_t, half *); void __ovld vstore4(half4, size_t, half *); void __ovld vstore8(half8, size_t, half *); void __ovld vstore16(half16, size_t, half *); #endif //cl_khr_fp16 #endif //defined(__opencl_c_generic_address_space) #if defined(__opencl_c_named_address_space_builtins) void __ovld vstore2(char2, size_t, __global char *); void __ovld vstore2(uchar2, size_t, __global uchar *); void __ovld vstore2(short2, size_t, __global short *); void __ovld vstore2(ushort2, size_t, __global ushort *); void __ovld vstore2(int2, size_t, __global int *); void __ovld vstore2(uint2, size_t, __global uint *); void __ovld vstore2(long2, size_t, __global long *); void __ovld vstore2(ulong2, size_t, __global ulong *); void __ovld vstore2(float2, size_t, __global float *); void __ovld vstore3(char3, size_t, __global char *); void __ovld vstore3(uchar3, size_t, __global uchar *); void __ovld vstore3(short3, size_t, __global short *); void __ovld vstore3(ushort3, size_t, __global ushort *); void __ovld vstore3(int3, size_t, __global int *); void __ovld vstore3(uint3, size_t, __global uint *); void __ovld vstore3(long3, size_t, __global long *); void __ovld vstore3(ulong3, size_t, __global ulong *); void __ovld vstore3(float3, size_t, __global float *); void __ovld vstore4(char4, size_t, __global char *); void __ovld vstore4(uchar4, size_t, __global uchar *); void __ovld vstore4(short4, size_t, __global short *); void __ovld vstore4(ushort4, size_t, __global ushort *); void __ovld vstore4(int4, size_t, __global int *); void __ovld vstore4(uint4, size_t, __global uint *); void __ovld vstore4(long4, size_t, __global long *); void __ovld vstore4(ulong4, size_t, __global ulong *); void __ovld vstore4(float4, size_t, __global float *); void __ovld vstore8(char8, size_t, __global char *); void __ovld vstore8(uchar8, size_t, __global uchar *); void __ovld vstore8(short8, size_t, __global short *); void __ovld vstore8(ushort8, size_t, __global ushort *); void __ovld vstore8(int8, size_t, __global int *); void __ovld vstore8(uint8, size_t, __global uint *); void __ovld vstore8(long8, size_t, __global long *); void __ovld vstore8(ulong8, size_t, __global ulong *); void __ovld vstore8(float8, size_t, __global float *); void __ovld vstore16(char16, size_t, __global char *); void __ovld vstore16(uchar16, size_t, __global uchar *); void __ovld vstore16(short16, size_t, __global short *); void __ovld vstore16(ushort16, size_t, __global ushort *); void __ovld vstore16(int16, size_t, __global int *); void __ovld vstore16(uint16, size_t, __global uint *); void __ovld vstore16(long16, size_t, __global long *); void __ovld vstore16(ulong16, size_t, __global ulong *); void __ovld vstore16(float16, size_t, __global float *); void __ovld vstore2(char2, size_t, __local char *); void __ovld vstore2(uchar2, size_t, __local uchar *); void __ovld vstore2(short2, size_t, __local short *); void __ovld vstore2(ushort2, size_t, __local ushort *); void __ovld vstore2(int2, size_t, __local int *); void __ovld vstore2(uint2, size_t, __local uint *); void __ovld vstore2(long2, size_t, __local long *); void __ovld vstore2(ulong2, size_t, __local ulong *); void __ovld vstore2(float2, size_t, __local float *); void __ovld vstore3(char3, size_t, __local char *); void __ovld vstore3(uchar3, size_t, __local uchar *); void __ovld vstore3(short3, size_t, __local short *); void __ovld vstore3(ushort3, size_t, __local ushort *); void __ovld vstore3(int3, size_t, __local int *); void __ovld vstore3(uint3, size_t, __local uint *); void __ovld vstore3(long3, size_t, __local long *); void __ovld vstore3(ulong3, size_t, __local ulong *); void __ovld vstore3(float3, size_t, __local float *); void __ovld vstore4(char4, size_t, __local char *); void __ovld vstore4(uchar4, size_t, __local uchar *); void __ovld vstore4(short4, size_t, __local short *); void __ovld vstore4(ushort4, size_t, __local ushort *); void __ovld vstore4(int4, size_t, __local int *); void __ovld vstore4(uint4, size_t, __local uint *); void __ovld vstore4(long4, size_t, __local long *); void __ovld vstore4(ulong4, size_t, __local ulong *); void __ovld vstore4(float4, size_t, __local float *); void __ovld vstore8(char8, size_t, __local char *); void __ovld vstore8(uchar8, size_t, __local uchar *); void __ovld vstore8(short8, size_t, __local short *); void __ovld vstore8(ushort8, size_t, __local ushort *); void __ovld vstore8(int8, size_t, __local int *); void __ovld vstore8(uint8, size_t, __local uint *); void __ovld vstore8(long8, size_t, __local long *); void __ovld vstore8(ulong8, size_t, __local ulong *); void __ovld vstore8(float8, size_t, __local float *); void __ovld vstore16(char16, size_t, __local char *); void __ovld vstore16(uchar16, size_t, __local uchar *); void __ovld vstore16(short16, size_t, __local short *); void __ovld vstore16(ushort16, size_t, __local ushort *); void __ovld vstore16(int16, size_t, __local int *); void __ovld vstore16(uint16, size_t, __local uint *); void __ovld vstore16(long16, size_t, __local long *); void __ovld vstore16(ulong16, size_t, __local ulong *); void __ovld vstore16(float16, size_t, __local float *); void __ovld vstore2(char2, size_t, __private char *); void __ovld vstore2(uchar2, size_t, __private uchar *); void __ovld vstore2(short2, size_t, __private short *); void __ovld vstore2(ushort2, size_t, __private ushort *); void __ovld vstore2(int2, size_t, __private int *); void __ovld vstore2(uint2, size_t, __private uint *); void __ovld vstore2(long2, size_t, __private long *); void __ovld vstore2(ulong2, size_t, __private ulong *); void __ovld vstore2(float2, size_t, __private float *); void __ovld vstore3(char3, size_t, __private char *); void __ovld vstore3(uchar3, size_t, __private uchar *); void __ovld vstore3(short3, size_t, __private short *); void __ovld vstore3(ushort3, size_t, __private ushort *); void __ovld vstore3(int3, size_t, __private int *); void __ovld vstore3(uint3, size_t, __private uint *); void __ovld vstore3(long3, size_t, __private long *); void __ovld vstore3(ulong3, size_t, __private ulong *); void __ovld vstore3(float3, size_t, __private float *); void __ovld vstore4(char4, size_t, __private char *); void __ovld vstore4(uchar4, size_t, __private uchar *); void __ovld vstore4(short4, size_t, __private short *); void __ovld vstore4(ushort4, size_t, __private ushort *); void __ovld vstore4(int4, size_t, __private int *); void __ovld vstore4(uint4, size_t, __private uint *); void __ovld vstore4(long4, size_t, __private long *); void __ovld vstore4(ulong4, size_t, __private ulong *); void __ovld vstore4(float4, size_t, __private float *); void __ovld vstore8(char8, size_t, __private char *); void __ovld vstore8(uchar8, size_t, __private uchar *); void __ovld vstore8(short8, size_t, __private short *); void __ovld vstore8(ushort8, size_t, __private ushort *); void __ovld vstore8(int8, size_t, __private int *); void __ovld vstore8(uint8, size_t, __private uint *); void __ovld vstore8(long8, size_t, __private long *); void __ovld vstore8(ulong8, size_t, __private ulong *); void __ovld vstore8(float8, size_t, __private float *); void __ovld vstore16(char16, size_t, __private char *); void __ovld vstore16(uchar16, size_t, __private uchar *); void __ovld vstore16(short16, size_t, __private short *); void __ovld vstore16(ushort16, size_t, __private ushort *); void __ovld vstore16(int16, size_t, __private int *); void __ovld vstore16(uint16, size_t, __private uint *); void __ovld vstore16(long16, size_t, __private long *); void __ovld vstore16(ulong16, size_t, __private ulong *); void __ovld vstore16(float16, size_t, __private float *); #ifdef cl_khr_fp64 void __ovld vstore2(double2, size_t, __global double *); void __ovld vstore3(double3, size_t, __global double *); void __ovld vstore4(double4, size_t, __global double *); void __ovld vstore8(double8, size_t, __global double *); void __ovld vstore16(double16, size_t, __global double *); void __ovld vstore2(double2, size_t, __local double *); void __ovld vstore3(double3, size_t, __local double *); void __ovld vstore4(double4, size_t, __local double *); void __ovld vstore8(double8, size_t, __local double *); void __ovld vstore16(double16, size_t, __local double *); void __ovld vstore2(double2, size_t, __private double *); void __ovld vstore3(double3, size_t, __private double *); void __ovld vstore4(double4, size_t, __private double *); void __ovld vstore8(double8, size_t, __private double *); void __ovld vstore16(double16, size_t, __private double *); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 void __ovld vstore2(half2, size_t, __global half *); void __ovld vstore3(half3, size_t, __global half *); void __ovld vstore4(half4, size_t, __global half *); void __ovld vstore8(half8, size_t, __global half *); void __ovld vstore16(half16, size_t, __global half *); void __ovld vstore2(half2, size_t, __local half *); void __ovld vstore3(half3, size_t, __local half *); void __ovld vstore4(half4, size_t, __local half *); void __ovld vstore8(half8, size_t, __local half *); void __ovld vstore16(half16, size_t, __local half *); void __ovld vstore2(half2, size_t, __private half *); void __ovld vstore3(half3, size_t, __private half *); void __ovld vstore4(half4, size_t, __private half *); void __ovld vstore8(half8, size_t, __private half *); void __ovld vstore16(half16, size_t, __private half *); #endif //cl_khr_fp16 #endif //defined(__opencl_c_named_address_space_builtins) /** * Read sizeof (half) bytes of data from address * (p + offset). The data read is interpreted as a * half value. The half value is converted to a * float value and the float value is returned. * The read address computed as (p + offset) * must be 16-bit aligned. */ float __ovld __purefn vload_half(size_t, const __constant half *); #if defined(__opencl_c_generic_address_space) float __ovld __purefn vload_half(size_t, const half *); #endif //defined(__opencl_c_generic_address_space) #if defined(__opencl_c_named_address_space_builtins) float __ovld __purefn vload_half(size_t, const __global half *); float __ovld __purefn vload_half(size_t, const __local half *); float __ovld __purefn vload_half(size_t, const __private half *); #endif //defined(__opencl_c_named_address_space_builtins) /** * Read sizeof (halfn) bytes of data from address * (p + (offset * n)). The data read is interpreted * as a halfn value. The halfn value read is * converted to a floatn value and the floatn * value is returned. The read address computed * as (p + (offset * n)) must be 16-bit aligned. */ float2 __ovld __purefn vload_half2(size_t, const __constant half *); float3 __ovld __purefn vload_half3(size_t, const __constant half *); float4 __ovld __purefn vload_half4(size_t, const __constant half *); float8 __ovld __purefn vload_half8(size_t, const __constant half *); float16 __ovld __purefn vload_half16(size_t, const __constant half *); #if defined(__opencl_c_generic_address_space) float2 __ovld __purefn vload_half2(size_t, const half *); float3 __ovld __purefn vload_half3(size_t, const half *); float4 __ovld __purefn vload_half4(size_t, const half *); float8 __ovld __purefn vload_half8(size_t, const half *); float16 __ovld __purefn vload_half16(size_t, const half *); #endif //defined(__opencl_c_generic_address_space) #if defined(__opencl_c_named_address_space_builtins) float2 __ovld __purefn vload_half2(size_t, const __global half *); float3 __ovld __purefn vload_half3(size_t, const __global half *); float4 __ovld __purefn vload_half4(size_t, const __global half *); float8 __ovld __purefn vload_half8(size_t, const __global half *); float16 __ovld __purefn vload_half16(size_t, const __global half *); float2 __ovld __purefn vload_half2(size_t, const __local half *); float3 __ovld __purefn vload_half3(size_t, const __local half *); float4 __ovld __purefn vload_half4(size_t, const __local half *); float8 __ovld __purefn vload_half8(size_t, const __local half *); float16 __ovld __purefn vload_half16(size_t, const __local half *); float2 __ovld __purefn vload_half2(size_t, const __private half *); float3 __ovld __purefn vload_half3(size_t, const __private half *); float4 __ovld __purefn vload_half4(size_t, const __private half *); float8 __ovld __purefn vload_half8(size_t, const __private half *); float16 __ovld __purefn vload_half16(size_t, const __private half *); #endif //defined(__opencl_c_named_address_space_builtins) /** * The float value given by data is first * converted to a half value using the appropriate * rounding mode. The half value is then written * to address computed as (p + offset). The * address computed as (p + offset) must be 16- * bit aligned. * vstore_half use the current rounding mode. * The default current rounding mode is round to * nearest even. */ #if defined(__opencl_c_generic_address_space) void __ovld vstore_half(float, size_t, half *); void __ovld vstore_half_rte(float, size_t, half *); void __ovld vstore_half_rtz(float, size_t, half *); void __ovld vstore_half_rtp(float, size_t, half *); void __ovld vstore_half_rtn(float, size_t, half *); #ifdef cl_khr_fp64 void __ovld vstore_half(double, size_t, half *); void __ovld vstore_half_rte(double, size_t, half *); void __ovld vstore_half_rtz(double, size_t, half *); void __ovld vstore_half_rtp(double, size_t, half *); void __ovld vstore_half_rtn(double, size_t, half *); #endif //cl_khr_fp64 #endif //defined(__opencl_c_generic_address_space) #if defined(__opencl_c_named_address_space_builtins) void __ovld vstore_half(float, size_t, __global half *); void __ovld vstore_half_rte(float, size_t, __global half *); void __ovld vstore_half_rtz(float, size_t, __global half *); void __ovld vstore_half_rtp(float, size_t, __global half *); void __ovld vstore_half_rtn(float, size_t, __global half *); void __ovld vstore_half(float, size_t, __local half *); void __ovld vstore_half_rte(float, size_t, __local half *); void __ovld vstore_half_rtz(float, size_t, __local half *); void __ovld vstore_half_rtp(float, size_t, __local half *); void __ovld vstore_half_rtn(float, size_t, __local half *); void __ovld vstore_half(float, size_t, __private half *); void __ovld vstore_half_rte(float, size_t, __private half *); void __ovld vstore_half_rtz(float, size_t, __private half *); void __ovld vstore_half_rtp(float, size_t, __private half *); void __ovld vstore_half_rtn(float, size_t, __private half *); #ifdef cl_khr_fp64 void __ovld vstore_half(double, size_t, __global half *); void __ovld vstore_half_rte(double, size_t, __global half *); void __ovld vstore_half_rtz(double, size_t, __global half *); void __ovld vstore_half_rtp(double, size_t, __global half *); void __ovld vstore_half_rtn(double, size_t, __global half *); void __ovld vstore_half(double, size_t, __local half *); void __ovld vstore_half_rte(double, size_t, __local half *); void __ovld vstore_half_rtz(double, size_t, __local half *); void __ovld vstore_half_rtp(double, size_t, __local half *); void __ovld vstore_half_rtn(double, size_t, __local half *); void __ovld vstore_half(double, size_t, __private half *); void __ovld vstore_half_rte(double, size_t, __private half *); void __ovld vstore_half_rtz(double, size_t, __private half *); void __ovld vstore_half_rtp(double, size_t, __private half *); void __ovld vstore_half_rtn(double, size_t, __private half *); #endif //cl_khr_fp64 #endif //defined(__opencl_c_named_address_space_builtins) /** * The floatn value given by data is converted to * a halfn value using the appropriate rounding * mode. The halfn value is then written to * address computed as (p + (offset * n)). The * address computed as (p + (offset * n)) must be * 16-bit aligned. * vstore_halfn uses the current rounding mode. * The default current rounding mode is round to * nearest even. */ #if defined(__opencl_c_generic_address_space) void __ovld vstore_half2(float2, size_t, half *); void __ovld vstore_half3(float3, size_t, half *); void __ovld vstore_half4(float4, size_t, half *); void __ovld vstore_half8(float8, size_t, half *); void __ovld vstore_half16(float16, size_t, half *); void __ovld vstore_half2_rte(float2, size_t, half *); void __ovld vstore_half3_rte(float3, size_t, half *); void __ovld vstore_half4_rte(float4, size_t, half *); void __ovld vstore_half8_rte(float8, size_t, half *); void __ovld vstore_half16_rte(float16, size_t, half *); void __ovld vstore_half2_rtz(float2, size_t, half *); void __ovld vstore_half3_rtz(float3, size_t, half *); void __ovld vstore_half4_rtz(float4, size_t, half *); void __ovld vstore_half8_rtz(float8, size_t, half *); void __ovld vstore_half16_rtz(float16, size_t, half *); void __ovld vstore_half2_rtp(float2, size_t, half *); void __ovld vstore_half3_rtp(float3, size_t, half *); void __ovld vstore_half4_rtp(float4, size_t, half *); void __ovld vstore_half8_rtp(float8, size_t, half *); void __ovld vstore_half16_rtp(float16, size_t, half *); void __ovld vstore_half2_rtn(float2, size_t, half *); void __ovld vstore_half3_rtn(float3, size_t, half *); void __ovld vstore_half4_rtn(float4, size_t, half *); void __ovld vstore_half8_rtn(float8, size_t, half *); void __ovld vstore_half16_rtn(float16, size_t, half *); #ifdef cl_khr_fp64 void __ovld vstore_half2(double2, size_t, half *); void __ovld vstore_half3(double3, size_t, half *); void __ovld vstore_half4(double4, size_t, half *); void __ovld vstore_half8(double8, size_t, half *); void __ovld vstore_half16(double16, size_t, half *); void __ovld vstore_half2_rte(double2, size_t, half *); void __ovld vstore_half3_rte(double3, size_t, half *); void __ovld vstore_half4_rte(double4, size_t, half *); void __ovld vstore_half8_rte(double8, size_t, half *); void __ovld vstore_half16_rte(double16, size_t, half *); void __ovld vstore_half2_rtz(double2, size_t, half *); void __ovld vstore_half3_rtz(double3, size_t, half *); void __ovld vstore_half4_rtz(double4, size_t, half *); void __ovld vstore_half8_rtz(double8, size_t, half *); void __ovld vstore_half16_rtz(double16, size_t, half *); void __ovld vstore_half2_rtp(double2, size_t, half *); void __ovld vstore_half3_rtp(double3, size_t, half *); void __ovld vstore_half4_rtp(double4, size_t, half *); void __ovld vstore_half8_rtp(double8, size_t, half *); void __ovld vstore_half16_rtp(double16, size_t, half *); void __ovld vstore_half2_rtn(double2, size_t, half *); void __ovld vstore_half3_rtn(double3, size_t, half *); void __ovld vstore_half4_rtn(double4, size_t, half *); void __ovld vstore_half8_rtn(double8, size_t, half *); void __ovld vstore_half16_rtn(double16, size_t, half *); #endif //cl_khr_fp64 #endif //defined(__opencl_c_generic_address_space) #if defined(__opencl_c_named_address_space_builtins) void __ovld vstore_half2(float2, size_t, __global half *); void __ovld vstore_half3(float3, size_t, __global half *); void __ovld vstore_half4(float4, size_t, __global half *); void __ovld vstore_half8(float8, size_t, __global half *); void __ovld vstore_half16(float16, size_t, __global half *); void __ovld vstore_half2_rte(float2, size_t, __global half *); void __ovld vstore_half3_rte(float3, size_t, __global half *); void __ovld vstore_half4_rte(float4, size_t, __global half *); void __ovld vstore_half8_rte(float8, size_t, __global half *); void __ovld vstore_half16_rte(float16, size_t, __global half *); void __ovld vstore_half2_rtz(float2, size_t, __global half *); void __ovld vstore_half3_rtz(float3, size_t, __global half *); void __ovld vstore_half4_rtz(float4, size_t, __global half *); void __ovld vstore_half8_rtz(float8, size_t, __global half *); void __ovld vstore_half16_rtz(float16, size_t, __global half *); void __ovld vstore_half2_rtp(float2, size_t, __global half *); void __ovld vstore_half3_rtp(float3, size_t, __global half *); void __ovld vstore_half4_rtp(float4, size_t, __global half *); void __ovld vstore_half8_rtp(float8, size_t, __global half *); void __ovld vstore_half16_rtp(float16, size_t, __global half *); void __ovld vstore_half2_rtn(float2, size_t, __global half *); void __ovld vstore_half3_rtn(float3, size_t, __global half *); void __ovld vstore_half4_rtn(float4, size_t, __global half *); void __ovld vstore_half8_rtn(float8, size_t, __global half *); void __ovld vstore_half16_rtn(float16, size_t, __global half *); void __ovld vstore_half2(float2, size_t, __local half *); void __ovld vstore_half3(float3, size_t, __local half *); void __ovld vstore_half4(float4, size_t, __local half *); void __ovld vstore_half8(float8, size_t, __local half *); void __ovld vstore_half16(float16, size_t, __local half *); void __ovld vstore_half2_rte(float2, size_t, __local half *); void __ovld vstore_half3_rte(float3, size_t, __local half *); void __ovld vstore_half4_rte(float4, size_t, __local half *); void __ovld vstore_half8_rte(float8, size_t, __local half *); void __ovld vstore_half16_rte(float16, size_t, __local half *); void __ovld vstore_half2_rtz(float2, size_t, __local half *); void __ovld vstore_half3_rtz(float3, size_t, __local half *); void __ovld vstore_half4_rtz(float4, size_t, __local half *); void __ovld vstore_half8_rtz(float8, size_t, __local half *); void __ovld vstore_half16_rtz(float16, size_t, __local half *); void __ovld vstore_half2_rtp(float2, size_t, __local half *); void __ovld vstore_half3_rtp(float3, size_t, __local half *); void __ovld vstore_half4_rtp(float4, size_t, __local half *); void __ovld vstore_half8_rtp(float8, size_t, __local half *); void __ovld vstore_half16_rtp(float16, size_t, __local half *); void __ovld vstore_half2_rtn(float2, size_t, __local half *); void __ovld vstore_half3_rtn(float3, size_t, __local half *); void __ovld vstore_half4_rtn(float4, size_t, __local half *); void __ovld vstore_half8_rtn(float8, size_t, __local half *); void __ovld vstore_half16_rtn(float16, size_t, __local half *); void __ovld vstore_half2(float2, size_t, __private half *); void __ovld vstore_half3(float3, size_t, __private half *); void __ovld vstore_half4(float4, size_t, __private half *); void __ovld vstore_half8(float8, size_t, __private half *); void __ovld vstore_half16(float16, size_t, __private half *); void __ovld vstore_half2_rte(float2, size_t, __private half *); void __ovld vstore_half3_rte(float3, size_t, __private half *); void __ovld vstore_half4_rte(float4, size_t, __private half *); void __ovld vstore_half8_rte(float8, size_t, __private half *); void __ovld vstore_half16_rte(float16, size_t, __private half *); void __ovld vstore_half2_rtz(float2, size_t, __private half *); void __ovld vstore_half3_rtz(float3, size_t, __private half *); void __ovld vstore_half4_rtz(float4, size_t, __private half *); void __ovld vstore_half8_rtz(float8, size_t, __private half *); void __ovld vstore_half16_rtz(float16, size_t, __private half *); void __ovld vstore_half2_rtp(float2, size_t, __private half *); void __ovld vstore_half3_rtp(float3, size_t, __private half *); void __ovld vstore_half4_rtp(float4, size_t, __private half *); void __ovld vstore_half8_rtp(float8, size_t, __private half *); void __ovld vstore_half16_rtp(float16, size_t, __private half *); void __ovld vstore_half2_rtn(float2, size_t, __private half *); void __ovld vstore_half3_rtn(float3, size_t, __private half *); void __ovld vstore_half4_rtn(float4, size_t, __private half *); void __ovld vstore_half8_rtn(float8, size_t, __private half *); void __ovld vstore_half16_rtn(float16, size_t, __private half *); #ifdef cl_khr_fp64 void __ovld vstore_half2(double2, size_t, __global half *); void __ovld vstore_half3(double3, size_t, __global half *); void __ovld vstore_half4(double4, size_t, __global half *); void __ovld vstore_half8(double8, size_t, __global half *); void __ovld vstore_half16(double16, size_t, __global half *); void __ovld vstore_half2_rte(double2, size_t, __global half *); void __ovld vstore_half3_rte(double3, size_t, __global half *); void __ovld vstore_half4_rte(double4, size_t, __global half *); void __ovld vstore_half8_rte(double8, size_t, __global half *); void __ovld vstore_half16_rte(double16, size_t, __global half *); void __ovld vstore_half2_rtz(double2, size_t, __global half *); void __ovld vstore_half3_rtz(double3, size_t, __global half *); void __ovld vstore_half4_rtz(double4, size_t, __global half *); void __ovld vstore_half8_rtz(double8, size_t, __global half *); void __ovld vstore_half16_rtz(double16, size_t, __global half *); void __ovld vstore_half2_rtp(double2, size_t, __global half *); void __ovld vstore_half3_rtp(double3, size_t, __global half *); void __ovld vstore_half4_rtp(double4, size_t, __global half *); void __ovld vstore_half8_rtp(double8, size_t, __global half *); void __ovld vstore_half16_rtp(double16, size_t, __global half *); void __ovld vstore_half2_rtn(double2, size_t, __global half *); void __ovld vstore_half3_rtn(double3, size_t, __global half *); void __ovld vstore_half4_rtn(double4, size_t, __global half *); void __ovld vstore_half8_rtn(double8, size_t, __global half *); void __ovld vstore_half16_rtn(double16, size_t, __global half *); void __ovld vstore_half2(double2, size_t, __local half *); void __ovld vstore_half3(double3, size_t, __local half *); void __ovld vstore_half4(double4, size_t, __local half *); void __ovld vstore_half8(double8, size_t, __local half *); void __ovld vstore_half16(double16, size_t, __local half *); void __ovld vstore_half2_rte(double2, size_t, __local half *); void __ovld vstore_half3_rte(double3, size_t, __local half *); void __ovld vstore_half4_rte(double4, size_t, __local half *); void __ovld vstore_half8_rte(double8, size_t, __local half *); void __ovld vstore_half16_rte(double16, size_t, __local half *); void __ovld vstore_half2_rtz(double2, size_t, __local half *); void __ovld vstore_half3_rtz(double3, size_t, __local half *); void __ovld vstore_half4_rtz(double4, size_t, __local half *); void __ovld vstore_half8_rtz(double8, size_t, __local half *); void __ovld vstore_half16_rtz(double16, size_t, __local half *); void __ovld vstore_half2_rtp(double2, size_t, __local half *); void __ovld vstore_half3_rtp(double3, size_t, __local half *); void __ovld vstore_half4_rtp(double4, size_t, __local half *); void __ovld vstore_half8_rtp(double8, size_t, __local half *); void __ovld vstore_half16_rtp(double16, size_t, __local half *); void __ovld vstore_half2_rtn(double2, size_t, __local half *); void __ovld vstore_half3_rtn(double3, size_t, __local half *); void __ovld vstore_half4_rtn(double4, size_t, __local half *); void __ovld vstore_half8_rtn(double8, size_t, __local half *); void __ovld vstore_half16_rtn(double16, size_t, __local half *); void __ovld vstore_half2(double2, size_t, __private half *); void __ovld vstore_half3(double3, size_t, __private half *); void __ovld vstore_half4(double4, size_t, __private half *); void __ovld vstore_half8(double8, size_t, __private half *); void __ovld vstore_half16(double16, size_t, __private half *); void __ovld vstore_half2_rte(double2, size_t, __private half *); void __ovld vstore_half3_rte(double3, size_t, __private half *); void __ovld vstore_half4_rte(double4, size_t, __private half *); void __ovld vstore_half8_rte(double8, size_t, __private half *); void __ovld vstore_half16_rte(double16, size_t, __private half *); void __ovld vstore_half2_rtz(double2, size_t, __private half *); void __ovld vstore_half3_rtz(double3, size_t, __private half *); void __ovld vstore_half4_rtz(double4, size_t, __private half *); void __ovld vstore_half8_rtz(double8, size_t, __private half *); void __ovld vstore_half16_rtz(double16, size_t, __private half *); void __ovld vstore_half2_rtp(double2, size_t, __private half *); void __ovld vstore_half3_rtp(double3, size_t, __private half *); void __ovld vstore_half4_rtp(double4, size_t, __private half *); void __ovld vstore_half8_rtp(double8, size_t, __private half *); void __ovld vstore_half16_rtp(double16, size_t, __private half *); void __ovld vstore_half2_rtn(double2, size_t, __private half *); void __ovld vstore_half3_rtn(double3, size_t, __private half *); void __ovld vstore_half4_rtn(double4, size_t, __private half *); void __ovld vstore_half8_rtn(double8, size_t, __private half *); void __ovld vstore_half16_rtn(double16, size_t, __private half *); #endif //cl_khr_fp64 #endif //defined(__opencl_c_named_address_space_builtins) /** * For n = 1, 2, 4, 8 and 16 read sizeof (halfn) * bytes of data from address (p + (offset * n)). * The data read is interpreted as a halfn value. * The halfn value read is converted to a floatn * value and the floatn value is returned. * The address computed as (p + (offset * n)) * must be aligned to sizeof (halfn) bytes. * For n = 3, vloada_half3 reads a half3 from * address (p + (offset * 4)) and returns a float3. * The address computed as (p + (offset * 4)) * must be aligned to sizeof (half) * 4 bytes. */ float2 __ovld __purefn vloada_half2(size_t, const __constant half *); float3 __ovld __purefn vloada_half3(size_t, const __constant half *); float4 __ovld __purefn vloada_half4(size_t, const __constant half *); float8 __ovld __purefn vloada_half8(size_t, const __constant half *); float16 __ovld __purefn vloada_half16(size_t, const __constant half *); #if defined(__opencl_c_generic_address_space) float2 __ovld __purefn vloada_half2(size_t, const half *); float3 __ovld __purefn vloada_half3(size_t, const half *); float4 __ovld __purefn vloada_half4(size_t, const half *); float8 __ovld __purefn vloada_half8(size_t, const half *); float16 __ovld __purefn vloada_half16(size_t, const half *); #endif //defined(__opencl_c_generic_address_space) #if defined(__opencl_c_named_address_space_builtins) float2 __ovld __purefn vloada_half2(size_t, const __global half *); float3 __ovld __purefn vloada_half3(size_t, const __global half *); float4 __ovld __purefn vloada_half4(size_t, const __global half *); float8 __ovld __purefn vloada_half8(size_t, const __global half *); float16 __ovld __purefn vloada_half16(size_t, const __global half *); float2 __ovld __purefn vloada_half2(size_t, const __local half *); float3 __ovld __purefn vloada_half3(size_t, const __local half *); float4 __ovld __purefn vloada_half4(size_t, const __local half *); float8 __ovld __purefn vloada_half8(size_t, const __local half *); float16 __ovld __purefn vloada_half16(size_t, const __local half *); float2 __ovld __purefn vloada_half2(size_t, const __private half *); float3 __ovld __purefn vloada_half3(size_t, const __private half *); float4 __ovld __purefn vloada_half4(size_t, const __private half *); float8 __ovld __purefn vloada_half8(size_t, const __private half *); float16 __ovld __purefn vloada_half16(size_t, const __private half *); #endif //defined(__opencl_c_named_address_space_builtins) /** * The floatn value given by data is converted to * a halfn value using the appropriate rounding * mode. * For n = 1, 2, 4, 8 and 16, the halfn value is * written to the address computed as (p + (offset * * n)). The address computed as (p + (offset * * n)) must be aligned to sizeof (halfn) bytes. * For n = 3, the half3 value is written to the * address computed as (p + (offset * 4)). The * address computed as (p + (offset * 4)) must be * aligned to sizeof (half) * 4 bytes. * vstorea_halfn uses the current rounding * mode. The default current rounding mode is * round to nearest even. */ #if defined(__opencl_c_generic_address_space) void __ovld vstorea_half2(float2, size_t, half *); void __ovld vstorea_half3(float3, size_t, half *); void __ovld vstorea_half4(float4, size_t, half *); void __ovld vstorea_half8(float8, size_t, half *); void __ovld vstorea_half16(float16, size_t, half *); void __ovld vstorea_half2_rte(float2, size_t, half *); void __ovld vstorea_half3_rte(float3, size_t, half *); void __ovld vstorea_half4_rte(float4, size_t, half *); void __ovld vstorea_half8_rte(float8, size_t, half *); void __ovld vstorea_half16_rte(float16, size_t, half *); void __ovld vstorea_half2_rtz(float2, size_t, half *); void __ovld vstorea_half3_rtz(float3, size_t, half *); void __ovld vstorea_half4_rtz(float4, size_t, half *); void __ovld vstorea_half8_rtz(float8, size_t, half *); void __ovld vstorea_half16_rtz(float16, size_t, half *); void __ovld vstorea_half2_rtp(float2, size_t, half *); void __ovld vstorea_half3_rtp(float3, size_t, half *); void __ovld vstorea_half4_rtp(float4, size_t, half *); void __ovld vstorea_half8_rtp(float8, size_t, half *); void __ovld vstorea_half16_rtp(float16, size_t, half *); void __ovld vstorea_half2_rtn(float2, size_t, half *); void __ovld vstorea_half3_rtn(float3, size_t, half *); void __ovld vstorea_half4_rtn(float4, size_t, half *); void __ovld vstorea_half8_rtn(float8, size_t, half *); void __ovld vstorea_half16_rtn(float16, size_t, half *); #ifdef cl_khr_fp64 void __ovld vstorea_half2(double2, size_t, half *); void __ovld vstorea_half3(double3, size_t, half *); void __ovld vstorea_half4(double4, size_t, half *); void __ovld vstorea_half8(double8, size_t, half *); void __ovld vstorea_half16(double16, size_t, half *); void __ovld vstorea_half2_rte(double2, size_t, half *); void __ovld vstorea_half3_rte(double3, size_t, half *); void __ovld vstorea_half4_rte(double4, size_t, half *); void __ovld vstorea_half8_rte(double8, size_t, half *); void __ovld vstorea_half16_rte(double16, size_t, half *); void __ovld vstorea_half2_rtz(double2, size_t, half *); void __ovld vstorea_half3_rtz(double3, size_t, half *); void __ovld vstorea_half4_rtz(double4, size_t, half *); void __ovld vstorea_half8_rtz(double8, size_t, half *); void __ovld vstorea_half16_rtz(double16, size_t, half *); void __ovld vstorea_half2_rtp(double2, size_t, half *); void __ovld vstorea_half3_rtp(double3, size_t, half *); void __ovld vstorea_half4_rtp(double4, size_t, half *); void __ovld vstorea_half8_rtp(double8, size_t, half *); void __ovld vstorea_half16_rtp(double16, size_t, half *); void __ovld vstorea_half2_rtn(double2, size_t, half *); void __ovld vstorea_half3_rtn(double3, size_t, half *); void __ovld vstorea_half4_rtn(double4, size_t, half *); void __ovld vstorea_half8_rtn(double8, size_t, half *); void __ovld vstorea_half16_rtn(double16, size_t, half *); #endif //cl_khr_fp64 #endif //defined(__opencl_c_generic_address_space) #if defined(__opencl_c_named_address_space_builtins) void __ovld vstorea_half2(float2, size_t, __global half *); void __ovld vstorea_half3(float3, size_t, __global half *); void __ovld vstorea_half4(float4, size_t, __global half *); void __ovld vstorea_half8(float8, size_t, __global half *); void __ovld vstorea_half16(float16, size_t, __global half *); void __ovld vstorea_half2_rte(float2, size_t, __global half *); void __ovld vstorea_half3_rte(float3, size_t, __global half *); void __ovld vstorea_half4_rte(float4, size_t, __global half *); void __ovld vstorea_half8_rte(float8, size_t, __global half *); void __ovld vstorea_half16_rte(float16, size_t, __global half *); void __ovld vstorea_half2_rtz(float2, size_t, __global half *); void __ovld vstorea_half3_rtz(float3, size_t, __global half *); void __ovld vstorea_half4_rtz(float4, size_t, __global half *); void __ovld vstorea_half8_rtz(float8, size_t, __global half *); void __ovld vstorea_half16_rtz(float16, size_t, __global half *); void __ovld vstorea_half2_rtp(float2, size_t, __global half *); void __ovld vstorea_half3_rtp(float3, size_t, __global half *); void __ovld vstorea_half4_rtp(float4, size_t, __global half *); void __ovld vstorea_half8_rtp(float8, size_t, __global half *); void __ovld vstorea_half16_rtp(float16, size_t, __global half *); void __ovld vstorea_half2_rtn(float2, size_t, __global half *); void __ovld vstorea_half3_rtn(float3, size_t, __global half *); void __ovld vstorea_half4_rtn(float4, size_t, __global half *); void __ovld vstorea_half8_rtn(float8, size_t, __global half *); void __ovld vstorea_half16_rtn(float16, size_t, __global half *); void __ovld vstorea_half2(float2, size_t, __local half *); void __ovld vstorea_half3(float3, size_t, __local half *); void __ovld vstorea_half4(float4, size_t, __local half *); void __ovld vstorea_half8(float8, size_t, __local half *); void __ovld vstorea_half16(float16, size_t, __local half *); void __ovld vstorea_half2_rte(float2, size_t, __local half *); void __ovld vstorea_half3_rte(float3, size_t, __local half *); void __ovld vstorea_half4_rte(float4, size_t, __local half *); void __ovld vstorea_half8_rte(float8, size_t, __local half *); void __ovld vstorea_half16_rte(float16, size_t, __local half *); void __ovld vstorea_half2_rtz(float2, size_t, __local half *); void __ovld vstorea_half3_rtz(float3, size_t, __local half *); void __ovld vstorea_half4_rtz(float4, size_t, __local half *); void __ovld vstorea_half8_rtz(float8, size_t, __local half *); void __ovld vstorea_half16_rtz(float16, size_t, __local half *); void __ovld vstorea_half2_rtp(float2, size_t, __local half *); void __ovld vstorea_half3_rtp(float3, size_t, __local half *); void __ovld vstorea_half4_rtp(float4, size_t, __local half *); void __ovld vstorea_half8_rtp(float8, size_t, __local half *); void __ovld vstorea_half16_rtp(float16, size_t, __local half *); void __ovld vstorea_half2_rtn(float2, size_t, __local half *); void __ovld vstorea_half3_rtn(float3, size_t, __local half *); void __ovld vstorea_half4_rtn(float4, size_t, __local half *); void __ovld vstorea_half8_rtn(float8, size_t, __local half *); void __ovld vstorea_half16_rtn(float16, size_t, __local half *); void __ovld vstorea_half2(float2, size_t, __private half *); void __ovld vstorea_half3(float3, size_t, __private half *); void __ovld vstorea_half4(float4, size_t, __private half *); void __ovld vstorea_half8(float8, size_t, __private half *); void __ovld vstorea_half16(float16, size_t, __private half *); void __ovld vstorea_half2_rte(float2, size_t, __private half *); void __ovld vstorea_half3_rte(float3, size_t, __private half *); void __ovld vstorea_half4_rte(float4, size_t, __private half *); void __ovld vstorea_half8_rte(float8, size_t, __private half *); void __ovld vstorea_half16_rte(float16, size_t, __private half *); void __ovld vstorea_half2_rtz(float2, size_t, __private half *); void __ovld vstorea_half3_rtz(float3, size_t, __private half *); void __ovld vstorea_half4_rtz(float4, size_t, __private half *); void __ovld vstorea_half8_rtz(float8, size_t, __private half *); void __ovld vstorea_half16_rtz(float16, size_t, __private half *); void __ovld vstorea_half2_rtp(float2, size_t, __private half *); void __ovld vstorea_half3_rtp(float3, size_t, __private half *); void __ovld vstorea_half4_rtp(float4, size_t, __private half *); void __ovld vstorea_half8_rtp(float8, size_t, __private half *); void __ovld vstorea_half16_rtp(float16, size_t, __private half *); void __ovld vstorea_half2_rtn(float2, size_t, __private half *); void __ovld vstorea_half3_rtn(float3, size_t, __private half *); void __ovld vstorea_half4_rtn(float4, size_t, __private half *); void __ovld vstorea_half8_rtn(float8, size_t, __private half *); void __ovld vstorea_half16_rtn(float16, size_t, __private half *); #ifdef cl_khr_fp64 void __ovld vstorea_half2(double2, size_t, __global half *); void __ovld vstorea_half3(double3, size_t, __global half *); void __ovld vstorea_half4(double4, size_t, __global half *); void __ovld vstorea_half8(double8, size_t, __global half *); void __ovld vstorea_half16(double16, size_t, __global half *); void __ovld vstorea_half2_rte(double2, size_t, __global half *); void __ovld vstorea_half3_rte(double3, size_t, __global half *); void __ovld vstorea_half4_rte(double4, size_t, __global half *); void __ovld vstorea_half8_rte(double8, size_t, __global half *); void __ovld vstorea_half16_rte(double16, size_t, __global half *); void __ovld vstorea_half2_rtz(double2, size_t, __global half *); void __ovld vstorea_half3_rtz(double3, size_t, __global half *); void __ovld vstorea_half4_rtz(double4, size_t, __global half *); void __ovld vstorea_half8_rtz(double8, size_t, __global half *); void __ovld vstorea_half16_rtz(double16, size_t, __global half *); void __ovld vstorea_half2_rtp(double2, size_t, __global half *); void __ovld vstorea_half3_rtp(double3, size_t, __global half *); void __ovld vstorea_half4_rtp(double4, size_t, __global half *); void __ovld vstorea_half8_rtp(double8, size_t, __global half *); void __ovld vstorea_half16_rtp(double16, size_t, __global half *); void __ovld vstorea_half2_rtn(double2, size_t, __global half *); void __ovld vstorea_half3_rtn(double3, size_t, __global half *); void __ovld vstorea_half4_rtn(double4, size_t, __global half *); void __ovld vstorea_half8_rtn(double8, size_t, __global half *); void __ovld vstorea_half16_rtn(double16, size_t, __global half *); void __ovld vstorea_half2(double2, size_t, __local half *); void __ovld vstorea_half3(double3, size_t, __local half *); void __ovld vstorea_half4(double4, size_t, __local half *); void __ovld vstorea_half8(double8, size_t, __local half *); void __ovld vstorea_half16(double16, size_t, __local half *); void __ovld vstorea_half2_rte(double2, size_t, __local half *); void __ovld vstorea_half3_rte(double3, size_t, __local half *); void __ovld vstorea_half4_rte(double4, size_t, __local half *); void __ovld vstorea_half8_rte(double8, size_t, __local half *); void __ovld vstorea_half16_rte(double16, size_t, __local half *); void __ovld vstorea_half2_rtz(double2, size_t, __local half *); void __ovld vstorea_half3_rtz(double3, size_t, __local half *); void __ovld vstorea_half4_rtz(double4, size_t, __local half *); void __ovld vstorea_half8_rtz(double8, size_t, __local half *); void __ovld vstorea_half16_rtz(double16, size_t, __local half *); void __ovld vstorea_half2_rtp(double2, size_t, __local half *); void __ovld vstorea_half3_rtp(double3, size_t, __local half *); void __ovld vstorea_half4_rtp(double4, size_t, __local half *); void __ovld vstorea_half8_rtp(double8, size_t, __local half *); void __ovld vstorea_half16_rtp(double16, size_t, __local half *); void __ovld vstorea_half2_rtn(double2, size_t, __local half *); void __ovld vstorea_half3_rtn(double3, size_t, __local half *); void __ovld vstorea_half4_rtn(double4, size_t, __local half *); void __ovld vstorea_half8_rtn(double8, size_t, __local half *); void __ovld vstorea_half16_rtn(double16, size_t, __local half *); void __ovld vstorea_half2(double2, size_t, __private half *); void __ovld vstorea_half3(double3, size_t, __private half *); void __ovld vstorea_half4(double4, size_t, __private half *); void __ovld vstorea_half8(double8, size_t, __private half *); void __ovld vstorea_half16(double16, size_t, __private half *); void __ovld vstorea_half2_rte(double2, size_t, __private half *); void __ovld vstorea_half3_rte(double3, size_t, __private half *); void __ovld vstorea_half4_rte(double4, size_t, __private half *); void __ovld vstorea_half8_rte(double8, size_t, __private half *); void __ovld vstorea_half16_rte(double16, size_t, __private half *); void __ovld vstorea_half2_rtz(double2, size_t, __private half *); void __ovld vstorea_half3_rtz(double3, size_t, __private half *); void __ovld vstorea_half4_rtz(double4, size_t, __private half *); void __ovld vstorea_half8_rtz(double8, size_t, __private half *); void __ovld vstorea_half16_rtz(double16, size_t, __private half *); void __ovld vstorea_half2_rtp(double2, size_t, __private half *); void __ovld vstorea_half3_rtp(double3, size_t, __private half *); void __ovld vstorea_half4_rtp(double4, size_t, __private half *); void __ovld vstorea_half8_rtp(double8, size_t, __private half *); void __ovld vstorea_half16_rtp(double16, size_t, __private half *); void __ovld vstorea_half2_rtn(double2, size_t, __private half *); void __ovld vstorea_half3_rtn(double3, size_t, __private half *); void __ovld vstorea_half4_rtn(double4, size_t, __private half *); void __ovld vstorea_half8_rtn(double8, size_t, __private half *); void __ovld vstorea_half16_rtn(double16, size_t, __private half *); #endif //cl_khr_fp64 #endif //defined(__opencl_c_named_address_space_builtins) // OpenCL v1.1 s6.11.8, v1.2 s6.12.8, v2.0 s6.13.8 - Synchronization Functions /** * All work-items in a work-group executing the kernel * on a processor must execute this function before any * are allowed to continue execution beyond the barrier. * This function must be encountered by all work-items in * a work-group executing the kernel. * If barrier is inside a conditional statement, then all * work-items must enter the conditional if any work-item * enters the conditional statement and executes the * barrier. * If barrer is inside a loop, all work-items must execute * the barrier for each iteration of the loop before any are * allowed to continue execution beyond the barrier. * The barrier function also queues a memory fence * (reads and writes) to ensure correct ordering of * memory operations to local or global memory. * The flags argument specifies the memory address space * and can be set to a combination of the following literal * values. * CLK_LOCAL_MEM_FENCE - The barrier function * will either flush any variables stored in local memory * or queue a memory fence to ensure correct ordering of * memory operations to local memory. * CLK_GLOBAL_MEM_FENCE - The barrier function * will queue a memory fence to ensure correct ordering * of memory operations to global memory. This can be * useful when work-items, for example, write to buffer or * image objects and then want to read the updated data. */ void __ovld __conv barrier(cl_mem_fence_flags); #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) void __ovld __conv work_group_barrier(cl_mem_fence_flags, memory_scope); void __ovld __conv work_group_barrier(cl_mem_fence_flags); #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) // OpenCL v1.1 s6.11.9, v1.2 s6.12.9 - Explicit Memory Fence Functions /** * Orders loads and stores of a work-item * executing a kernel. This means that loads * and stores preceding the mem_fence will * be committed to memory before any loads * and stores following the mem_fence. * The flags argument specifies the memory * address space and can be set to a * combination of the following literal * values: * CLK_LOCAL_MEM_FENCE * CLK_GLOBAL_MEM_FENCE. */ void __ovld mem_fence(cl_mem_fence_flags); /** * Read memory barrier that orders only * loads. * The flags argument specifies the memory * address space and can be set to a * combination of the following literal * values: * CLK_LOCAL_MEM_FENCE * CLK_GLOBAL_MEM_FENCE. */ void __ovld read_mem_fence(cl_mem_fence_flags); /** * Write memory barrier that orders only * stores. * The flags argument specifies the memory * address space and can be set to a * combination of the following literal * values: * CLK_LOCAL_MEM_FENCE * CLK_GLOBAL_MEM_FENCE. */ void __ovld write_mem_fence(cl_mem_fence_flags); // OpenCL v2.0 s6.13.9 - Address Space Qualifier Functions #if defined(__opencl_c_generic_address_space) cl_mem_fence_flags __ovld get_fence(const void *ptr); cl_mem_fence_flags __ovld get_fence(void *ptr); /** * Builtin functions to_global, to_local, and to_private need to be declared as Clang builtin functions * and checked in Sema since they should be declared as * addr gentype* to_addr (gentype*); * where gentype is builtin type or user defined type. */ #endif //defined(__opencl_c_generic_address_space) // OpenCL v1.1 s6.11.10, v1.2 s6.12.10, v2.0 s6.13.10 - Async Copies from Global to Local Memory, Local to Global Memory, and Prefetch /** * event_t async_work_group_copy ( * __global gentype *dst, * const __local gentype *src, * size_t num_elements, * event_t event) * Perform an async copy of num_elements * gentype elements from src to dst. The async * copy is performed by all work-items in a workgroup * and this built-in function must therefore * be encountered by all work-items in a workgroup * executing the kernel with the same * argument values; otherwise the results are * undefined. * Returns an event object that can be used by * wait_group_events to wait for the async copy * to finish. The event argument can also be used * to associate the async_work_group_copy with * a previous async copy allowing an event to be * shared by multiple async copies; otherwise event * should be zero. * If event argument is non-zero, the event object * supplied in event argument will be returned. * This function does not perform any implicit * synchronization of source data such as using a * barrier before performing the copy. */ event_t __ovld async_work_group_copy(__local char *, const __global char *, size_t, event_t); event_t __ovld async_work_group_copy(__local uchar *, const __global uchar *, size_t, event_t); event_t __ovld async_work_group_copy(__local short *, const __global short *, size_t, event_t); event_t __ovld async_work_group_copy(__local ushort *, const __global ushort *, size_t, event_t); event_t __ovld async_work_group_copy(__local int *, const __global int *, size_t, event_t); event_t __ovld async_work_group_copy(__local uint *, const __global uint *, size_t, event_t); event_t __ovld async_work_group_copy(__local long *, const __global long *, size_t, event_t); event_t __ovld async_work_group_copy(__local ulong *, const __global ulong *, size_t, event_t); event_t __ovld async_work_group_copy(__local float *, const __global float *, size_t, event_t); event_t __ovld async_work_group_copy(__local char2 *, const __global char2 *, size_t, event_t); event_t __ovld async_work_group_copy(__local uchar2 *, const __global uchar2 *, size_t, event_t); event_t __ovld async_work_group_copy(__local short2 *, const __global short2 *, size_t, event_t); event_t __ovld async_work_group_copy(__local ushort2 *, const __global ushort2 *, size_t, event_t); event_t __ovld async_work_group_copy(__local int2 *, const __global int2 *, size_t, event_t); event_t __ovld async_work_group_copy(__local uint2 *, const __global uint2 *, size_t, event_t); event_t __ovld async_work_group_copy(__local long2 *, const __global long2 *, size_t, event_t); event_t __ovld async_work_group_copy(__local ulong2 *, const __global ulong2 *, size_t, event_t); event_t __ovld async_work_group_copy(__local float2 *, const __global float2 *, size_t, event_t); event_t __ovld async_work_group_copy(__local char3 *, const __global char3 *, size_t, event_t); event_t __ovld async_work_group_copy(__local uchar3 *, const __global uchar3 *, size_t, event_t); event_t __ovld async_work_group_copy(__local short3 *, const __global short3 *, size_t, event_t); event_t __ovld async_work_group_copy(__local ushort3 *, const __global ushort3 *, size_t, event_t); event_t __ovld async_work_group_copy(__local int3 *, const __global int3 *, size_t, event_t); event_t __ovld async_work_group_copy(__local uint3 *, const __global uint3 *, size_t, event_t); event_t __ovld async_work_group_copy(__local long3 *, const __global long3 *, size_t, event_t); event_t __ovld async_work_group_copy(__local ulong3 *, const __global ulong3 *, size_t, event_t); event_t __ovld async_work_group_copy(__local float3 *, const __global float3 *, size_t, event_t); event_t __ovld async_work_group_copy(__local char4 *, const __global char4 *, size_t, event_t); event_t __ovld async_work_group_copy(__local uchar4 *, const __global uchar4 *, size_t, event_t); event_t __ovld async_work_group_copy(__local short4 *, const __global short4 *, size_t, event_t); event_t __ovld async_work_group_copy(__local ushort4 *, const __global ushort4 *, size_t, event_t); event_t __ovld async_work_group_copy(__local int4 *, const __global int4 *, size_t, event_t); event_t __ovld async_work_group_copy(__local uint4 *, const __global uint4 *, size_t, event_t); event_t __ovld async_work_group_copy(__local long4 *, const __global long4 *, size_t, event_t); event_t __ovld async_work_group_copy(__local ulong4 *, const __global ulong4 *, size_t, event_t); event_t __ovld async_work_group_copy(__local float4 *, const __global float4 *, size_t, event_t); event_t __ovld async_work_group_copy(__local char8 *, const __global char8 *, size_t, event_t); event_t __ovld async_work_group_copy(__local uchar8 *, const __global uchar8 *, size_t, event_t); event_t __ovld async_work_group_copy(__local short8 *, const __global short8 *, size_t, event_t); event_t __ovld async_work_group_copy(__local ushort8 *, const __global ushort8 *, size_t, event_t); event_t __ovld async_work_group_copy(__local int8 *, const __global int8 *, size_t, event_t); event_t __ovld async_work_group_copy(__local uint8 *, const __global uint8 *, size_t, event_t); event_t __ovld async_work_group_copy(__local long8 *, const __global long8 *, size_t, event_t); event_t __ovld async_work_group_copy(__local ulong8 *, const __global ulong8 *, size_t, event_t); event_t __ovld async_work_group_copy(__local float8 *, const __global float8 *, size_t, event_t); event_t __ovld async_work_group_copy(__local char16 *, const __global char16 *, size_t, event_t); event_t __ovld async_work_group_copy(__local uchar16 *, const __global uchar16 *, size_t, event_t); event_t __ovld async_work_group_copy(__local short16 *, const __global short16 *, size_t, event_t); event_t __ovld async_work_group_copy(__local ushort16 *, const __global ushort16 *, size_t, event_t); event_t __ovld async_work_group_copy(__local int16 *, const __global int16 *, size_t, event_t); event_t __ovld async_work_group_copy(__local uint16 *, const __global uint16 *, size_t, event_t); event_t __ovld async_work_group_copy(__local long16 *, const __global long16 *, size_t, event_t); event_t __ovld async_work_group_copy(__local ulong16 *, const __global ulong16 *, size_t, event_t); event_t __ovld async_work_group_copy(__local float16 *, const __global float16 *, size_t, event_t); event_t __ovld async_work_group_copy(__global char *, const __local char *, size_t, event_t); event_t __ovld async_work_group_copy(__global uchar *, const __local uchar *, size_t, event_t); event_t __ovld async_work_group_copy(__global short *, const __local short *, size_t, event_t); event_t __ovld async_work_group_copy(__global ushort *, const __local ushort *, size_t, event_t); event_t __ovld async_work_group_copy(__global int *, const __local int *, size_t, event_t); event_t __ovld async_work_group_copy(__global uint *, const __local uint *, size_t, event_t); event_t __ovld async_work_group_copy(__global long *, const __local long *, size_t, event_t); event_t __ovld async_work_group_copy(__global ulong *, const __local ulong *, size_t, event_t); event_t __ovld async_work_group_copy(__global float *, const __local float *, size_t, event_t); event_t __ovld async_work_group_copy(__global char2 *, const __local char2 *, size_t, event_t); event_t __ovld async_work_group_copy(__global uchar2 *, const __local uchar2 *, size_t, event_t); event_t __ovld async_work_group_copy(__global short2 *, const __local short2 *, size_t, event_t); event_t __ovld async_work_group_copy(__global ushort2 *, const __local ushort2 *, size_t, event_t); event_t __ovld async_work_group_copy(__global int2 *, const __local int2 *, size_t, event_t); event_t __ovld async_work_group_copy(__global uint2 *, const __local uint2 *, size_t, event_t); event_t __ovld async_work_group_copy(__global long2 *, const __local long2 *, size_t, event_t); event_t __ovld async_work_group_copy(__global ulong2 *, const __local ulong2 *, size_t, event_t); event_t __ovld async_work_group_copy(__global float2 *, const __local float2 *, size_t, event_t); event_t __ovld async_work_group_copy(__global char3 *, const __local char3 *, size_t, event_t); event_t __ovld async_work_group_copy(__global uchar3 *, const __local uchar3 *, size_t, event_t); event_t __ovld async_work_group_copy(__global short3 *, const __local short3 *, size_t, event_t); event_t __ovld async_work_group_copy(__global ushort3 *, const __local ushort3 *, size_t, event_t); event_t __ovld async_work_group_copy(__global int3 *, const __local int3 *, size_t, event_t); event_t __ovld async_work_group_copy(__global uint3 *, const __local uint3 *, size_t, event_t); event_t __ovld async_work_group_copy(__global long3 *, const __local long3 *, size_t, event_t); event_t __ovld async_work_group_copy(__global ulong3 *, const __local ulong3 *, size_t, event_t); event_t __ovld async_work_group_copy(__global float3 *, const __local float3 *, size_t, event_t); event_t __ovld async_work_group_copy(__global char4 *, const __local char4 *, size_t, event_t); event_t __ovld async_work_group_copy(__global uchar4 *, const __local uchar4 *, size_t, event_t); event_t __ovld async_work_group_copy(__global short4 *, const __local short4 *, size_t, event_t); event_t __ovld async_work_group_copy(__global ushort4 *, const __local ushort4 *, size_t, event_t); event_t __ovld async_work_group_copy(__global int4 *, const __local int4 *, size_t, event_t); event_t __ovld async_work_group_copy(__global uint4 *, const __local uint4 *, size_t, event_t); event_t __ovld async_work_group_copy(__global long4 *, const __local long4 *, size_t, event_t); event_t __ovld async_work_group_copy(__global ulong4 *, const __local ulong4 *, size_t, event_t); event_t __ovld async_work_group_copy(__global float4 *, const __local float4 *, size_t, event_t); event_t __ovld async_work_group_copy(__global char8 *, const __local char8 *, size_t, event_t); event_t __ovld async_work_group_copy(__global uchar8 *, const __local uchar8 *, size_t, event_t); event_t __ovld async_work_group_copy(__global short8 *, const __local short8 *, size_t, event_t); event_t __ovld async_work_group_copy(__global ushort8 *, const __local ushort8 *, size_t, event_t); event_t __ovld async_work_group_copy(__global int8 *, const __local int8 *, size_t, event_t); event_t __ovld async_work_group_copy(__global uint8 *, const __local uint8 *, size_t, event_t); event_t __ovld async_work_group_copy(__global long8 *, const __local long8 *, size_t, event_t); event_t __ovld async_work_group_copy(__global ulong8 *, const __local ulong8 *, size_t, event_t); event_t __ovld async_work_group_copy(__global float8 *, const __local float8 *, size_t, event_t); event_t __ovld async_work_group_copy(__global char16 *, const __local char16 *, size_t, event_t); event_t __ovld async_work_group_copy(__global uchar16 *, const __local uchar16 *, size_t, event_t); event_t __ovld async_work_group_copy(__global short16 *, const __local short16 *, size_t, event_t); event_t __ovld async_work_group_copy(__global ushort16 *, const __local ushort16 *, size_t, event_t); event_t __ovld async_work_group_copy(__global int16 *, const __local int16 *, size_t, event_t); event_t __ovld async_work_group_copy(__global uint16 *, const __local uint16 *, size_t, event_t); event_t __ovld async_work_group_copy(__global long16 *, const __local long16 *, size_t, event_t); event_t __ovld async_work_group_copy(__global ulong16 *, const __local ulong16 *, size_t, event_t); event_t __ovld async_work_group_copy(__global float16 *, const __local float16 *, size_t, event_t); #ifdef cl_khr_fp64 event_t __ovld async_work_group_copy(__local double *, const __global double *, size_t, event_t); event_t __ovld async_work_group_copy(__local double2 *, const __global double2 *, size_t, event_t); event_t __ovld async_work_group_copy(__local double3 *, const __global double3 *, size_t, event_t); event_t __ovld async_work_group_copy(__local double4 *, const __global double4 *, size_t, event_t); event_t __ovld async_work_group_copy(__local double8 *, const __global double8 *, size_t, event_t); event_t __ovld async_work_group_copy(__local double16 *, const __global double16 *, size_t, event_t); event_t __ovld async_work_group_copy(__global double *, const __local double *, size_t, event_t); event_t __ovld async_work_group_copy(__global double2 *, const __local double2 *, size_t, event_t); event_t __ovld async_work_group_copy(__global double3 *, const __local double3 *, size_t, event_t); event_t __ovld async_work_group_copy(__global double4 *, const __local double4 *, size_t, event_t); event_t __ovld async_work_group_copy(__global double8 *, const __local double8 *, size_t, event_t); event_t __ovld async_work_group_copy(__global double16 *, const __local double16 *, size_t, event_t); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 event_t __ovld async_work_group_copy(__local half *, const __global half *, size_t, event_t); event_t __ovld async_work_group_copy(__local half2 *, const __global half2 *, size_t, event_t); event_t __ovld async_work_group_copy(__local half3 *, const __global half3 *, size_t, event_t); event_t __ovld async_work_group_copy(__local half4 *, const __global half4 *, size_t, event_t); event_t __ovld async_work_group_copy(__local half8 *, const __global half8 *, size_t, event_t); event_t __ovld async_work_group_copy(__local half16 *, const __global half16 *, size_t, event_t); event_t __ovld async_work_group_copy(__global half *, const __local half *, size_t, event_t); event_t __ovld async_work_group_copy(__global half2 *, const __local half2 *, size_t, event_t); event_t __ovld async_work_group_copy(__global half3 *, const __local half3 *, size_t, event_t); event_t __ovld async_work_group_copy(__global half4 *, const __local half4 *, size_t, event_t); event_t __ovld async_work_group_copy(__global half8 *, const __local half8 *, size_t, event_t); event_t __ovld async_work_group_copy(__global half16 *, const __local half16 *, size_t, event_t); #endif //cl_khr_fp16 /** * Perform an async gather of num_elements * gentype elements from src to dst. The * src_stride is the stride in elements for each * gentype element read from src. The dst_stride * is the stride in elements for each gentype * element written to dst. The async gather is * performed by all work-items in a work-group. * This built-in function must therefore be * encountered by all work-items in a work-group * executing the kernel with the same argument * values; otherwise the results are undefined. * Returns an event object that can be used by * wait_group_events to wait for the async copy * to finish. The event argument can also be used * to associate the * async_work_group_strided_copy with a * previous async copy allowing an event to be * shared by multiple async copies; otherwise event * should be zero. * If event argument is non-zero, the event object * supplied in event argument will be returned. * This function does not perform any implicit * synchronization of source data such as using a * barrier before performing the copy. */ event_t __ovld async_work_group_strided_copy(__local char *, const __global char *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local uchar *, const __global uchar *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local short *, const __global short *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local ushort *, const __global ushort *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local int *, const __global int *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local uint *, const __global uint *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local long *, const __global long *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local ulong *, const __global ulong *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local float *, const __global float *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local char2 *, const __global char2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local uchar2 *, const __global uchar2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local short2 *, const __global short2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local ushort2 *, const __global ushort2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local int2 *, const __global int2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local uint2 *, const __global uint2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local long2 *, const __global long2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local ulong2 *, const __global ulong2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local float2 *, const __global float2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local char3 *, const __global char3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local uchar3 *, const __global uchar3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local short3 *, const __global short3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local ushort3 *, const __global ushort3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local int3 *, const __global int3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local uint3 *, const __global uint3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local long3 *, const __global long3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local ulong3 *, const __global ulong3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local float3 *, const __global float3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local char4 *, const __global char4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local uchar4 *, const __global uchar4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local short4 *, const __global short4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local ushort4 *, const __global ushort4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local int4 *, const __global int4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local uint4 *, const __global uint4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local long4 *, const __global long4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local ulong4 *, const __global ulong4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local float4 *, const __global float4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local char8 *, const __global char8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local uchar8 *, const __global uchar8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local short8 *, const __global short8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local ushort8 *, const __global ushort8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local int8 *, const __global int8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local uint8 *, const __global uint8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local long8 *, const __global long8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local ulong8 *, const __global ulong8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local float8 *, const __global float8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local char16 *, const __global char16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local uchar16 *, const __global uchar16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local short16 *, const __global short16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local ushort16 *, const __global ushort16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local int16 *, const __global int16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local uint16 *, const __global uint16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local long16 *, const __global long16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local ulong16 *, const __global ulong16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local float16 *, const __global float16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global char *, const __local char *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global uchar *, const __local uchar *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global short *, const __local short *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global ushort *, const __local ushort *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global int *, const __local int *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global uint *, const __local uint *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global long *, const __local long *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global ulong *, const __local ulong *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global float *, const __local float *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global char2 *, const __local char2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global uchar2 *, const __local uchar2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global short2 *, const __local short2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global ushort2 *, const __local ushort2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global int2 *, const __local int2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global uint2 *, const __local uint2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global long2 *, const __local long2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global ulong2 *, const __local ulong2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global float2 *, const __local float2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global char3 *, const __local char3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global uchar3 *, const __local uchar3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global short3 *, const __local short3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global ushort3 *, const __local ushort3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global int3 *, const __local int3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global uint3 *, const __local uint3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global long3 *, const __local long3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global ulong3 *, const __local ulong3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global float3 *, const __local float3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global char4 *, const __local char4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global uchar4 *, const __local uchar4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global short4 *, const __local short4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global ushort4 *, const __local ushort4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global int4 *, const __local int4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global uint4 *, const __local uint4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global long4 *, const __local long4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global ulong4 *, const __local ulong4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global float4 *, const __local float4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global char8 *, const __local char8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global uchar8 *, const __local uchar8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global short8 *, const __local short8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global ushort8 *, const __local ushort8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global int8 *, const __local int8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global uint8 *, const __local uint8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global long8 *, const __local long8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global ulong8 *, const __local ulong8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global float8 *, const __local float8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global char16 *, const __local char16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global uchar16 *, const __local uchar16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global short16 *, const __local short16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global ushort16 *, const __local ushort16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global int16 *, const __local int16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global uint16 *, const __local uint16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global long16 *, const __local long16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global ulong16 *, const __local ulong16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global float16 *, const __local float16 *, size_t, size_t, event_t); #ifdef cl_khr_fp64 event_t __ovld async_work_group_strided_copy(__local double *, const __global double *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local double2 *, const __global double2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local double3 *, const __global double3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local double4 *, const __global double4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local double8 *, const __global double8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local double16 *, const __global double16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global double *, const __local double *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global double2 *, const __local double2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global double3 *, const __local double3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global double4 *, const __local double4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global double8 *, const __local double8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global double16 *, const __local double16 *, size_t, size_t, event_t); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 event_t __ovld async_work_group_strided_copy(__local half *, const __global half *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local half2 *, const __global half2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local half3 *, const __global half3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local half4 *, const __global half4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local half8 *, const __global half8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__local half16 *, const __global half16 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global half *, const __local half *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global half2 *, const __local half2 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global half3 *, const __local half3 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global half4 *, const __local half4 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global half8 *, const __local half8 *, size_t, size_t, event_t); event_t __ovld async_work_group_strided_copy(__global half16 *, const __local half16 *, size_t, size_t, event_t); #endif //cl_khr_fp16 /** * Wait for events that identify the * async_work_group_copy operations to * complete. The event objects specified in * event_list will be released after the wait is * performed. * This function must be encountered by all workitems * in a work-group executing the kernel with * the same num_events and event objects specified * in event_list; otherwise the results are undefined. */ void __ovld wait_group_events(int, event_t *); /** * Prefetch num_elements * sizeof(gentype) * bytes into the global cache. The prefetch * instruction is applied to a work-item in a workgroup * and does not affect the functional * behavior of the kernel. */ void __ovld prefetch(const __global char *, size_t); void __ovld prefetch(const __global uchar *, size_t); void __ovld prefetch(const __global short *, size_t); void __ovld prefetch(const __global ushort *, size_t); void __ovld prefetch(const __global int *, size_t); void __ovld prefetch(const __global uint *, size_t); void __ovld prefetch(const __global long *, size_t); void __ovld prefetch(const __global ulong *, size_t); void __ovld prefetch(const __global float *, size_t); void __ovld prefetch(const __global char2 *, size_t); void __ovld prefetch(const __global uchar2 *, size_t); void __ovld prefetch(const __global short2 *, size_t); void __ovld prefetch(const __global ushort2 *, size_t); void __ovld prefetch(const __global int2 *, size_t); void __ovld prefetch(const __global uint2 *, size_t); void __ovld prefetch(const __global long2 *, size_t); void __ovld prefetch(const __global ulong2 *, size_t); void __ovld prefetch(const __global float2 *, size_t); void __ovld prefetch(const __global char3 *, size_t); void __ovld prefetch(const __global uchar3 *, size_t); void __ovld prefetch(const __global short3 *, size_t); void __ovld prefetch(const __global ushort3 *, size_t); void __ovld prefetch(const __global int3 *, size_t); void __ovld prefetch(const __global uint3 *, size_t); void __ovld prefetch(const __global long3 *, size_t); void __ovld prefetch(const __global ulong3 *, size_t); void __ovld prefetch(const __global float3 *, size_t); void __ovld prefetch(const __global char4 *, size_t); void __ovld prefetch(const __global uchar4 *, size_t); void __ovld prefetch(const __global short4 *, size_t); void __ovld prefetch(const __global ushort4 *, size_t); void __ovld prefetch(const __global int4 *, size_t); void __ovld prefetch(const __global uint4 *, size_t); void __ovld prefetch(const __global long4 *, size_t); void __ovld prefetch(const __global ulong4 *, size_t); void __ovld prefetch(const __global float4 *, size_t); void __ovld prefetch(const __global char8 *, size_t); void __ovld prefetch(const __global uchar8 *, size_t); void __ovld prefetch(const __global short8 *, size_t); void __ovld prefetch(const __global ushort8 *, size_t); void __ovld prefetch(const __global int8 *, size_t); void __ovld prefetch(const __global uint8 *, size_t); void __ovld prefetch(const __global long8 *, size_t); void __ovld prefetch(const __global ulong8 *, size_t); void __ovld prefetch(const __global float8 *, size_t); void __ovld prefetch(const __global char16 *, size_t); void __ovld prefetch(const __global uchar16 *, size_t); void __ovld prefetch(const __global short16 *, size_t); void __ovld prefetch(const __global ushort16 *, size_t); void __ovld prefetch(const __global int16 *, size_t); void __ovld prefetch(const __global uint16 *, size_t); void __ovld prefetch(const __global long16 *, size_t); void __ovld prefetch(const __global ulong16 *, size_t); void __ovld prefetch(const __global float16 *, size_t); #ifdef cl_khr_fp64 void __ovld prefetch(const __global double *, size_t); void __ovld prefetch(const __global double2 *, size_t); void __ovld prefetch(const __global double3 *, size_t); void __ovld prefetch(const __global double4 *, size_t); void __ovld prefetch(const __global double8 *, size_t); void __ovld prefetch(const __global double16 *, size_t); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 void __ovld prefetch(const __global half *, size_t); void __ovld prefetch(const __global half2 *, size_t); void __ovld prefetch(const __global half3 *, size_t); void __ovld prefetch(const __global half4 *, size_t); void __ovld prefetch(const __global half8 *, size_t); void __ovld prefetch(const __global half16 *, size_t); #endif // cl_khr_fp16 // OpenCL v1.1 s6.11.1, v1.2 s6.12.11 - Atomic Functions #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable #pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable #endif /** * Read the 32-bit value (referred to as old) * stored at location pointed by p. Compute * (old + val) and store result at location * pointed by p. The function returns old. */ int __ovld atomic_add(volatile __global int *, int); uint __ovld atomic_add(volatile __global uint *, uint); int __ovld atomic_add(volatile __local int *, int); uint __ovld atomic_add(volatile __local uint *, uint); #ifdef __OPENCL_CPP_VERSION__ int __ovld atomic_add(volatile int *, int); uint __ovld atomic_add(volatile uint *, uint); #endif #if defined(cl_khr_global_int32_base_atomics) int __ovld atom_add(volatile __global int *, int); uint __ovld atom_add(volatile __global uint *, uint); #endif #if defined(cl_khr_local_int32_base_atomics) int __ovld atom_add(volatile __local int *, int); uint __ovld atom_add(volatile __local uint *, uint); #endif #if defined(cl_khr_int64_base_atomics) long __ovld atom_add(volatile __global long *, long); ulong __ovld atom_add(volatile __global ulong *, ulong); long __ovld atom_add(volatile __local long *, long); ulong __ovld atom_add(volatile __local ulong *, ulong); #endif /** * Read the 32-bit value (referred to as old) stored at location pointed by p. * Compute (old - val) and store result at location pointed by p. The function * returns old. */ int __ovld atomic_sub(volatile __global int *, int); uint __ovld atomic_sub(volatile __global uint *, uint); int __ovld atomic_sub(volatile __local int *, int); uint __ovld atomic_sub(volatile __local uint *, uint); #ifdef __OPENCL_CPP_VERSION__ int __ovld atomic_sub(volatile int *, int); uint __ovld atomic_sub(volatile uint *, uint); #endif #if defined(cl_khr_global_int32_base_atomics) int __ovld atom_sub(volatile __global int *, int); uint __ovld atom_sub(volatile __global uint *, uint); #endif #if defined(cl_khr_local_int32_base_atomics) int __ovld atom_sub(volatile __local int *, int); uint __ovld atom_sub(volatile __local uint *, uint); #endif #if defined(cl_khr_int64_base_atomics) long __ovld atom_sub(volatile __global long *, long); ulong __ovld atom_sub(volatile __global ulong *, ulong); long __ovld atom_sub(volatile __local long *, long); ulong __ovld atom_sub(volatile __local ulong *, ulong); #endif /** * Swaps the old value stored at location p * with new value given by val. Returns old * value. */ int __ovld atomic_xchg(volatile __global int *, int); uint __ovld atomic_xchg(volatile __global uint *, uint); int __ovld atomic_xchg(volatile __local int *, int); uint __ovld atomic_xchg(volatile __local uint *, uint); float __ovld atomic_xchg(volatile __global float *, float); float __ovld atomic_xchg(volatile __local float *, float); #ifdef __OPENCL_CPP_VERSION__ int __ovld atomic_xchg(volatile int *, int); uint __ovld atomic_xchg(volatile uint *, uint); float __ovld atomic_xchg(volatile float *, float); #endif #if defined(cl_khr_global_int32_base_atomics) int __ovld atom_xchg(volatile __global int *, int); uint __ovld atom_xchg(volatile __global uint *, uint); #endif #if defined(cl_khr_local_int32_base_atomics) int __ovld atom_xchg(volatile __local int *, int); uint __ovld atom_xchg(volatile __local uint *, uint); #endif #if defined(cl_khr_int64_base_atomics) long __ovld atom_xchg(volatile __global long *, long); long __ovld atom_xchg(volatile __local long *, long); ulong __ovld atom_xchg(volatile __global ulong *, ulong); ulong __ovld atom_xchg(volatile __local ulong *, ulong); #endif /** * Read the 32-bit value (referred to as old) * stored at location pointed by p. Compute * (old + 1) and store result at location * pointed by p. The function returns old. */ int __ovld atomic_inc(volatile __global int *); uint __ovld atomic_inc(volatile __global uint *); int __ovld atomic_inc(volatile __local int *); uint __ovld atomic_inc(volatile __local uint *); #ifdef __OPENCL_CPP_VERSION__ int __ovld atomic_inc(volatile int *); uint __ovld atomic_inc(volatile uint *); #endif #if defined(cl_khr_global_int32_base_atomics) int __ovld atom_inc(volatile __global int *); uint __ovld atom_inc(volatile __global uint *); #endif #if defined(cl_khr_local_int32_base_atomics) int __ovld atom_inc(volatile __local int *); uint __ovld atom_inc(volatile __local uint *); #endif #if defined(cl_khr_int64_base_atomics) long __ovld atom_inc(volatile __global long *); ulong __ovld atom_inc(volatile __global ulong *); long __ovld atom_inc(volatile __local long *); ulong __ovld atom_inc(volatile __local ulong *); #endif /** * Read the 32-bit value (referred to as old) * stored at location pointed by p. Compute * (old - 1) and store result at location * pointed by p. The function returns old. */ int __ovld atomic_dec(volatile __global int *); uint __ovld atomic_dec(volatile __global uint *); int __ovld atomic_dec(volatile __local int *); uint __ovld atomic_dec(volatile __local uint *); #ifdef __OPENCL_CPP_VERSION__ int __ovld atomic_dec(volatile int *); uint __ovld atomic_dec(volatile uint *); #endif #if defined(cl_khr_global_int32_base_atomics) int __ovld atom_dec(volatile __global int *); uint __ovld atom_dec(volatile __global uint *); #endif #if defined(cl_khr_local_int32_base_atomics) int __ovld atom_dec(volatile __local int *); uint __ovld atom_dec(volatile __local uint *); #endif #if defined(cl_khr_int64_base_atomics) long __ovld atom_dec(volatile __global long *); ulong __ovld atom_dec(volatile __global ulong *); long __ovld atom_dec(volatile __local long *); ulong __ovld atom_dec(volatile __local ulong *); #endif /** * Read the 32-bit value (referred to as old) * stored at location pointed by p. Compute * (old == cmp) ? val : old and store result at * location pointed by p. The function * returns old. */ int __ovld atomic_cmpxchg(volatile __global int *, int, int); uint __ovld atomic_cmpxchg(volatile __global uint *, uint, uint); int __ovld atomic_cmpxchg(volatile __local int *, int, int); uint __ovld atomic_cmpxchg(volatile __local uint *, uint, uint); #ifdef __OPENCL_CPP_VERSION__ int __ovld atomic_cmpxchg(volatile int *, int, int); uint __ovld atomic_cmpxchg(volatile uint *, uint, uint); #endif #if defined(cl_khr_global_int32_base_atomics) int __ovld atom_cmpxchg(volatile __global int *, int, int); uint __ovld atom_cmpxchg(volatile __global uint *, uint, uint); #endif #if defined(cl_khr_local_int32_base_atomics) int __ovld atom_cmpxchg(volatile __local int *, int, int); uint __ovld atom_cmpxchg(volatile __local uint *, uint, uint); #endif #if defined(cl_khr_int64_base_atomics) long __ovld atom_cmpxchg(volatile __global long *, long, long); ulong __ovld atom_cmpxchg(volatile __global ulong *, ulong, ulong); long __ovld atom_cmpxchg(volatile __local long *, long, long); ulong __ovld atom_cmpxchg(volatile __local ulong *, ulong, ulong); #endif /** * Read the 32-bit value (referred to as old) * stored at location pointed by p. Compute * min(old, val) and store minimum value at * location pointed by p. The function * returns old. */ int __ovld atomic_min(volatile __global int *, int); uint __ovld atomic_min(volatile __global uint *, uint); int __ovld atomic_min(volatile __local int *, int); uint __ovld atomic_min(volatile __local uint *, uint); #ifdef __OPENCL_CPP_VERSION__ int __ovld atomic_min(volatile int *, int); uint __ovld atomic_min(volatile uint *, uint); #endif #if defined(cl_khr_global_int32_extended_atomics) int __ovld atom_min(volatile __global int *, int); uint __ovld atom_min(volatile __global uint *, uint); #endif #if defined(cl_khr_local_int32_extended_atomics) int __ovld atom_min(volatile __local int *, int); uint __ovld atom_min(volatile __local uint *, uint); #endif #if defined(cl_khr_int64_extended_atomics) long __ovld atom_min(volatile __global long *, long); ulong __ovld atom_min(volatile __global ulong *, ulong); long __ovld atom_min(volatile __local long *, long); ulong __ovld atom_min(volatile __local ulong *, ulong); #endif /** * Read the 32-bit value (referred to as old) * stored at location pointed by p. Compute * max(old, val) and store maximum value at * location pointed by p. The function * returns old. */ int __ovld atomic_max(volatile __global int *, int); uint __ovld atomic_max(volatile __global uint *, uint); int __ovld atomic_max(volatile __local int *, int); uint __ovld atomic_max(volatile __local uint *, uint); #ifdef __OPENCL_CPP_VERSION__ int __ovld atomic_max(volatile int *, int); uint __ovld atomic_max(volatile uint *, uint); #endif #if defined(cl_khr_global_int32_extended_atomics) int __ovld atom_max(volatile __global int *, int); uint __ovld atom_max(volatile __global uint *, uint); #endif #if defined(cl_khr_local_int32_extended_atomics) int __ovld atom_max(volatile __local int *, int); uint __ovld atom_max(volatile __local uint *, uint); #endif #if defined(cl_khr_int64_extended_atomics) long __ovld atom_max(volatile __global long *, long); ulong __ovld atom_max(volatile __global ulong *, ulong); long __ovld atom_max(volatile __local long *, long); ulong __ovld atom_max(volatile __local ulong *, ulong); #endif /** * Read the 32-bit value (referred to as old) * stored at location pointed by p. Compute * (old & val) and store result at location * pointed by p. The function returns old. */ int __ovld atomic_and(volatile __global int *, int); uint __ovld atomic_and(volatile __global uint *, uint); int __ovld atomic_and(volatile __local int *, int); uint __ovld atomic_and(volatile __local uint *, uint); #ifdef __OPENCL_CPP_VERSION__ int __ovld atomic_and(volatile int *, int); uint __ovld atomic_and(volatile uint *, uint); #endif #if defined(cl_khr_global_int32_extended_atomics) int __ovld atom_and(volatile __global int *, int); uint __ovld atom_and(volatile __global uint *, uint); #endif #if defined(cl_khr_local_int32_extended_atomics) int __ovld atom_and(volatile __local int *, int); uint __ovld atom_and(volatile __local uint *, uint); #endif #if defined(cl_khr_int64_extended_atomics) long __ovld atom_and(volatile __global long *, long); ulong __ovld atom_and(volatile __global ulong *, ulong); long __ovld atom_and(volatile __local long *, long); ulong __ovld atom_and(volatile __local ulong *, ulong); #endif /** * Read the 32-bit value (referred to as old) * stored at location pointed by p. Compute * (old | val) and store result at location * pointed by p. The function returns old. */ int __ovld atomic_or(volatile __global int *, int); uint __ovld atomic_or(volatile __global uint *, uint); int __ovld atomic_or(volatile __local int *, int); uint __ovld atomic_or(volatile __local uint *, uint); #ifdef __OPENCL_CPP_VERSION__ int __ovld atomic_or(volatile int *, int); uint __ovld atomic_or(volatile uint *, uint); #endif #if defined(cl_khr_global_int32_extended_atomics) int __ovld atom_or(volatile __global int *, int); uint __ovld atom_or(volatile __global uint *, uint); #endif #if defined(cl_khr_local_int32_extended_atomics) int __ovld atom_or(volatile __local int *, int); uint __ovld atom_or(volatile __local uint *, uint); #endif #if defined(cl_khr_int64_extended_atomics) long __ovld atom_or(volatile __global long *, long); ulong __ovld atom_or(volatile __global ulong *, ulong); long __ovld atom_or(volatile __local long *, long); ulong __ovld atom_or(volatile __local ulong *, ulong); #endif /** * Read the 32-bit value (referred to as old) * stored at location pointed by p. Compute * (old ^ val) and store result at location * pointed by p. The function returns old. */ int __ovld atomic_xor(volatile __global int *, int); uint __ovld atomic_xor(volatile __global uint *, uint); int __ovld atomic_xor(volatile __local int *, int); uint __ovld atomic_xor(volatile __local uint *, uint); #ifdef __OPENCL_CPP_VERSION__ int __ovld atomic_xor(volatile int *, int); uint __ovld atomic_xor(volatile uint *, uint); #endif #if defined(cl_khr_global_int32_extended_atomics) int __ovld atom_xor(volatile __global int *, int); uint __ovld atom_xor(volatile __global uint *, uint); #endif #if defined(cl_khr_local_int32_extended_atomics) int __ovld atom_xor(volatile __local int *, int); uint __ovld atom_xor(volatile __local uint *, uint); #endif #if defined(cl_khr_int64_extended_atomics) long __ovld atom_xor(volatile __global long *, long); ulong __ovld atom_xor(volatile __global ulong *, ulong); long __ovld atom_xor(volatile __local long *, long); ulong __ovld atom_xor(volatile __local ulong *, ulong); #endif #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #pragma OPENCL EXTENSION cl_khr_int64_base_atomics : disable #pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : disable #endif // OpenCL v2.0 s6.13.11 - Atomics Functions #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) // double atomics support requires extensions cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable #pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable #endif // atomic_init() #if defined(__opencl_c_generic_address_space) void __ovld atomic_init(volatile atomic_int *, int); void __ovld atomic_init(volatile atomic_uint *, uint); void __ovld atomic_init(volatile atomic_float *, float); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) void __ovld atomic_init(volatile atomic_long *, long); void __ovld atomic_init(volatile atomic_ulong *, ulong); #ifdef cl_khr_fp64 void __ovld atomic_init(volatile atomic_double *, double); #endif //cl_khr_fp64 #endif #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) void __ovld atomic_init(volatile __global atomic_int *, int); void __ovld atomic_init(volatile __local atomic_int *, int); void __ovld atomic_init(volatile __global atomic_uint *, uint); void __ovld atomic_init(volatile __local atomic_uint *, uint); void __ovld atomic_init(volatile __global atomic_float *, float); void __ovld atomic_init(volatile __local atomic_float *, float); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) void __ovld atomic_init(volatile __global atomic_long *, long); void __ovld atomic_init(volatile __local atomic_long *, long); void __ovld atomic_init(volatile __global atomic_ulong *, ulong); void __ovld atomic_init(volatile __local atomic_ulong *, ulong); #ifdef cl_khr_fp64 void __ovld atomic_init(volatile __global atomic_double *, double); void __ovld atomic_init(volatile __local atomic_double *, double); #endif //cl_khr_fp64 #endif #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) // atomic_work_item_fence() void __ovld atomic_work_item_fence(cl_mem_fence_flags, memory_order, memory_scope); // atomic_fetch() // OpenCL v2.0 s6.13.11.7.5: // add/sub: atomic type argument can be uintptr_t/intptr_t, value type argument can be ptrdiff_t. #if defined(__opencl_c_atomic_order_seq_cst) && defined(__opencl_c_atomic_scope_device) #if defined(__opencl_c_generic_address_space) int __ovld atomic_fetch_add(volatile atomic_int *, int); uint __ovld atomic_fetch_add(volatile atomic_uint *, uint); int __ovld atomic_fetch_sub(volatile atomic_int *, int); uint __ovld atomic_fetch_sub(volatile atomic_uint *, uint); int __ovld atomic_fetch_or(volatile atomic_int *, int); uint __ovld atomic_fetch_or(volatile atomic_uint *, uint); int __ovld atomic_fetch_xor(volatile atomic_int *, int); uint __ovld atomic_fetch_xor(volatile atomic_uint *, uint); int __ovld atomic_fetch_and(volatile atomic_int *, int); uint __ovld atomic_fetch_and(volatile atomic_uint *, uint); int __ovld atomic_fetch_min(volatile atomic_int *, int); uint __ovld atomic_fetch_min(volatile atomic_uint *, uint); int __ovld atomic_fetch_max(volatile atomic_int *, int); uint __ovld atomic_fetch_max(volatile atomic_uint *, uint); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) long __ovld atomic_fetch_add(volatile atomic_long *, long); ulong __ovld atomic_fetch_add(volatile atomic_ulong *, ulong); long __ovld atomic_fetch_sub(volatile atomic_long *, long); ulong __ovld atomic_fetch_sub(volatile atomic_ulong *, ulong); long __ovld atomic_fetch_or(volatile atomic_long *, long); ulong __ovld atomic_fetch_or(volatile atomic_ulong *, ulong); long __ovld atomic_fetch_xor(volatile atomic_long *, long); ulong __ovld atomic_fetch_xor(volatile atomic_ulong *, ulong); long __ovld atomic_fetch_and(volatile atomic_long *, long); ulong __ovld atomic_fetch_and(volatile atomic_ulong *, ulong); long __ovld atomic_fetch_min(volatile atomic_long *, long); ulong __ovld atomic_fetch_min(volatile atomic_ulong *, ulong); long __ovld atomic_fetch_max(volatile atomic_long *, long); ulong __ovld atomic_fetch_max(volatile atomic_ulong *, ulong); uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t *, ptrdiff_t); uintptr_t __ovld atomic_fetch_sub(volatile atomic_uintptr_t *, ptrdiff_t); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) int __ovld atomic_fetch_add(volatile __global atomic_int *, int); int __ovld atomic_fetch_add(volatile __local atomic_int *, int); uint __ovld atomic_fetch_add(volatile __global atomic_uint *, uint); uint __ovld atomic_fetch_add(volatile __local atomic_uint *, uint); int __ovld atomic_fetch_sub(volatile __global atomic_int *, int); int __ovld atomic_fetch_sub(volatile __local atomic_int *, int); uint __ovld atomic_fetch_sub(volatile __global atomic_uint *, uint); uint __ovld atomic_fetch_sub(volatile __local atomic_uint *, uint); int __ovld atomic_fetch_or(volatile __global atomic_int *, int); int __ovld atomic_fetch_or(volatile __local atomic_int *, int); uint __ovld atomic_fetch_or(volatile __global atomic_uint *, uint); uint __ovld atomic_fetch_or(volatile __local atomic_uint *, uint); int __ovld atomic_fetch_xor(volatile __global atomic_int *, int); int __ovld atomic_fetch_xor(volatile __local atomic_int *, int); uint __ovld atomic_fetch_xor(volatile __global atomic_uint *, uint); uint __ovld atomic_fetch_xor(volatile __local atomic_uint *, uint); int __ovld atomic_fetch_and(volatile __global atomic_int *, int); int __ovld atomic_fetch_and(volatile __local atomic_int *, int); uint __ovld atomic_fetch_and(volatile __global atomic_uint *, uint); uint __ovld atomic_fetch_and(volatile __local atomic_uint *, uint); int __ovld atomic_fetch_min(volatile __global atomic_int *, int); int __ovld atomic_fetch_min(volatile __local atomic_int *, int); uint __ovld atomic_fetch_min(volatile __global atomic_uint *, uint); uint __ovld atomic_fetch_min(volatile __local atomic_uint *, uint); int __ovld atomic_fetch_max(volatile __global atomic_int *, int); int __ovld atomic_fetch_max(volatile __local atomic_int *, int); uint __ovld atomic_fetch_max(volatile __global atomic_uint *, uint); uint __ovld atomic_fetch_max(volatile __local atomic_uint *, uint); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) long __ovld atomic_fetch_add(volatile __global atomic_long *, long); long __ovld atomic_fetch_add(volatile __local atomic_long *, long); ulong __ovld atomic_fetch_add(volatile __global atomic_ulong *, ulong); ulong __ovld atomic_fetch_add(volatile __local atomic_ulong *, ulong); uintptr_t __ovld atomic_fetch_add(volatile __global atomic_uintptr_t *, ptrdiff_t); uintptr_t __ovld atomic_fetch_add(volatile __local atomic_uintptr_t *, ptrdiff_t); long __ovld atomic_fetch_sub(volatile __global atomic_long *, long); long __ovld atomic_fetch_sub(volatile __local atomic_long *, long); ulong __ovld atomic_fetch_sub(volatile __global atomic_ulong *, ulong); ulong __ovld atomic_fetch_sub(volatile __local atomic_ulong *, ulong); uintptr_t __ovld atomic_fetch_sub(volatile __global atomic_uintptr_t *, ptrdiff_t); uintptr_t __ovld atomic_fetch_sub(volatile __local atomic_uintptr_t *, ptrdiff_t); long __ovld atomic_fetch_or(volatile __global atomic_long *, long); long __ovld atomic_fetch_or(volatile __local atomic_long *, long); ulong __ovld atomic_fetch_or(volatile __global atomic_ulong *, ulong); ulong __ovld atomic_fetch_or(volatile __local atomic_ulong *, ulong); uintptr_t __ovld atomic_fetch_or(volatile __global atomic_uintptr_t *, intptr_t); uintptr_t __ovld atomic_fetch_or(volatile __local atomic_uintptr_t *, intptr_t); intptr_t __ovld atomic_fetch_or(volatile __global atomic_intptr_t *, uintptr_t); intptr_t __ovld atomic_fetch_or(volatile __local atomic_intptr_t *, uintptr_t); long __ovld atomic_fetch_xor(volatile __global atomic_long *, long); long __ovld atomic_fetch_xor(volatile __local atomic_long *, long); ulong __ovld atomic_fetch_xor(volatile __global atomic_ulong *, ulong); ulong __ovld atomic_fetch_xor(volatile __local atomic_ulong *, ulong); uintptr_t __ovld atomic_fetch_xor(volatile __global atomic_uintptr_t *, intptr_t); uintptr_t __ovld atomic_fetch_xor(volatile __local atomic_uintptr_t *, intptr_t); intptr_t __ovld atomic_fetch_xor(volatile __global atomic_intptr_t *, uintptr_t); intptr_t __ovld atomic_fetch_xor(volatile __local atomic_intptr_t *, uintptr_t); long __ovld atomic_fetch_and(volatile __global atomic_long *, long); long __ovld atomic_fetch_and(volatile __local atomic_long *, long); ulong __ovld atomic_fetch_and(volatile __global atomic_ulong *, ulong); ulong __ovld atomic_fetch_and(volatile __local atomic_ulong *, ulong); uintptr_t __ovld atomic_fetch_and(volatile __global atomic_uintptr_t *, intptr_t); uintptr_t __ovld atomic_fetch_and(volatile __local atomic_uintptr_t *, intptr_t); intptr_t __ovld atomic_fetch_and(volatile __global atomic_intptr_t *, uintptr_t); intptr_t __ovld atomic_fetch_and(volatile __local atomic_intptr_t *, uintptr_t); long __ovld atomic_fetch_min(volatile __global atomic_long *, long); long __ovld atomic_fetch_min(volatile __local atomic_long *, long); ulong __ovld atomic_fetch_min(volatile __global atomic_ulong *, ulong); ulong __ovld atomic_fetch_min(volatile __local atomic_ulong *, ulong); uintptr_t __ovld atomic_fetch_min(volatile __global atomic_uintptr_t *, intptr_t); uintptr_t __ovld atomic_fetch_min(volatile __local atomic_uintptr_t *, intptr_t); intptr_t __ovld atomic_fetch_min(volatile __global atomic_intptr_t *, uintptr_t); intptr_t __ovld atomic_fetch_min(volatile __local atomic_intptr_t *, uintptr_t); long __ovld atomic_fetch_max(volatile __global atomic_long *, long); long __ovld atomic_fetch_max(volatile __local atomic_long *, long); ulong __ovld atomic_fetch_max(volatile __global atomic_ulong *, ulong); ulong __ovld atomic_fetch_max(volatile __local atomic_ulong *, ulong); uintptr_t __ovld atomic_fetch_max(volatile __global atomic_uintptr_t *, uintptr_t); uintptr_t __ovld atomic_fetch_max(volatile __local atomic_uintptr_t *, uintptr_t); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) #endif #if defined(__opencl_c_atomic_scope_device) #if defined(__opencl_c_generic_address_space) int __ovld atomic_fetch_add_explicit(volatile atomic_int *, int, memory_order); uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *, uint, memory_order); int __ovld atomic_fetch_sub_explicit(volatile atomic_int *, int, memory_order); uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *, uint, memory_order); int __ovld atomic_fetch_or_explicit(volatile atomic_int *, int, memory_order); uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *, uint, memory_order); int __ovld atomic_fetch_xor_explicit(volatile atomic_int *, int, memory_order); uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *, uint, memory_order); int __ovld atomic_fetch_and_explicit(volatile atomic_int *, int, memory_order); uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *, uint, memory_order); int __ovld atomic_fetch_min_explicit(volatile atomic_int *, int, memory_order); uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *, uint, memory_order); int __ovld atomic_fetch_max_explicit(volatile atomic_int *, int, memory_order); uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *, uint, memory_order); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) long __ovld atomic_fetch_add_explicit(volatile atomic_long *, long, memory_order); ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *, ulong, memory_order); long __ovld atomic_fetch_sub_explicit(volatile atomic_long *, long, memory_order); ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *, ulong, memory_order); long __ovld atomic_fetch_or_explicit(volatile atomic_long *, long, memory_order); ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *, ulong, memory_order); long __ovld atomic_fetch_xor_explicit(volatile atomic_long *, long, memory_order); ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *, ulong, memory_order); long __ovld atomic_fetch_and_explicit(volatile atomic_long *, long, memory_order); ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *, ulong, memory_order); long __ovld atomic_fetch_min_explicit(volatile atomic_long *, long, memory_order); ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *, ulong, memory_order); long __ovld atomic_fetch_max_explicit(volatile atomic_long *, long, memory_order); ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *, ulong, memory_order); uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *, ptrdiff_t, memory_order); uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *, ptrdiff_t, memory_order); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) int __ovld atomic_fetch_add_explicit(volatile __global atomic_int *, int, memory_order); int __ovld atomic_fetch_add_explicit(volatile __local atomic_int *, int, memory_order); uint __ovld atomic_fetch_add_explicit(volatile __global atomic_uint *, uint, memory_order); uint __ovld atomic_fetch_add_explicit(volatile __local atomic_uint *, uint, memory_order); int __ovld atomic_fetch_sub_explicit(volatile __global atomic_int *, int, memory_order); int __ovld atomic_fetch_sub_explicit(volatile __local atomic_int *, int, memory_order); uint __ovld atomic_fetch_sub_explicit(volatile __global atomic_uint *, uint, memory_order); uint __ovld atomic_fetch_sub_explicit(volatile __local atomic_uint *, uint, memory_order); int __ovld atomic_fetch_or_explicit(volatile __global atomic_int *, int, memory_order); int __ovld atomic_fetch_or_explicit(volatile __local atomic_int *, int, memory_order); uint __ovld atomic_fetch_or_explicit(volatile __global atomic_uint *, uint, memory_order); uint __ovld atomic_fetch_or_explicit(volatile __local atomic_uint *, uint, memory_order); int __ovld atomic_fetch_xor_explicit(volatile __global atomic_int *, int, memory_order); int __ovld atomic_fetch_xor_explicit(volatile __local atomic_int *, int, memory_order); uint __ovld atomic_fetch_xor_explicit(volatile __global atomic_uint *, uint, memory_order); uint __ovld atomic_fetch_xor_explicit(volatile __local atomic_uint *, uint, memory_order); int __ovld atomic_fetch_and_explicit(volatile __global atomic_int *, int, memory_order); int __ovld atomic_fetch_and_explicit(volatile __local atomic_int *, int, memory_order); uint __ovld atomic_fetch_and_explicit(volatile __global atomic_uint *, uint, memory_order); uint __ovld atomic_fetch_and_explicit(volatile __local atomic_uint *, uint, memory_order); int __ovld atomic_fetch_min_explicit(volatile __global atomic_int *, int, memory_order); int __ovld atomic_fetch_min_explicit(volatile __local atomic_int *, int, memory_order); uint __ovld atomic_fetch_min_explicit(volatile __global atomic_uint *, uint, memory_order); uint __ovld atomic_fetch_min_explicit(volatile __local atomic_uint *, uint, memory_order); int __ovld atomic_fetch_max_explicit(volatile __global atomic_int *, int, memory_order); int __ovld atomic_fetch_max_explicit(volatile __local atomic_int *, int, memory_order); uint __ovld atomic_fetch_max_explicit(volatile __global atomic_uint *, uint, memory_order); uint __ovld atomic_fetch_max_explicit(volatile __local atomic_uint *, uint, memory_order); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) long __ovld atomic_fetch_add_explicit(volatile __global atomic_long *, long, memory_order); long __ovld atomic_fetch_add_explicit(volatile __local atomic_long *, long, memory_order); ulong __ovld atomic_fetch_add_explicit(volatile __global atomic_ulong *, ulong, memory_order); ulong __ovld atomic_fetch_add_explicit(volatile __local atomic_ulong *, ulong, memory_order); uintptr_t __ovld atomic_fetch_add_explicit(volatile __global atomic_uintptr_t *, ptrdiff_t, memory_order); uintptr_t __ovld atomic_fetch_add_explicit(volatile __local atomic_uintptr_t *, ptrdiff_t, memory_order); long __ovld atomic_fetch_sub_explicit(volatile __global atomic_long *, long, memory_order); long __ovld atomic_fetch_sub_explicit(volatile __local atomic_long *, long, memory_order); ulong __ovld atomic_fetch_sub_explicit(volatile __global atomic_ulong *, ulong, memory_order); ulong __ovld atomic_fetch_sub_explicit(volatile __local atomic_ulong *, ulong, memory_order); uintptr_t __ovld atomic_fetch_sub_explicit(volatile __global atomic_uintptr_t *, ptrdiff_t, memory_order); uintptr_t __ovld atomic_fetch_sub_explicit(volatile __local atomic_uintptr_t *, ptrdiff_t, memory_order); long __ovld atomic_fetch_or_explicit(volatile __global atomic_long *, long, memory_order); long __ovld atomic_fetch_or_explicit(volatile __local atomic_long *, long, memory_order); ulong __ovld atomic_fetch_or_explicit(volatile __global atomic_ulong *, ulong, memory_order); ulong __ovld atomic_fetch_or_explicit(volatile __local atomic_ulong *, ulong, memory_order); uintptr_t __ovld atomic_fetch_or_explicit(volatile __global atomic_uintptr_t *, intptr_t, memory_order); uintptr_t __ovld atomic_fetch_or_explicit(volatile __local atomic_uintptr_t *, intptr_t, memory_order); intptr_t __ovld atomic_fetch_or_explicit(volatile __global atomic_intptr_t *, uintptr_t, memory_order); intptr_t __ovld atomic_fetch_or_explicit(volatile __local atomic_intptr_t *, uintptr_t, memory_order); long __ovld atomic_fetch_xor_explicit(volatile __global atomic_long *, long, memory_order); long __ovld atomic_fetch_xor_explicit(volatile __local atomic_long *, long, memory_order); ulong __ovld atomic_fetch_xor_explicit(volatile __global atomic_ulong *, ulong, memory_order); ulong __ovld atomic_fetch_xor_explicit(volatile __local atomic_ulong *, ulong, memory_order); uintptr_t __ovld atomic_fetch_xor_explicit(volatile __global atomic_uintptr_t *, intptr_t, memory_order); uintptr_t __ovld atomic_fetch_xor_explicit(volatile __local atomic_uintptr_t *, intptr_t, memory_order); intptr_t __ovld atomic_fetch_xor_explicit(volatile __global atomic_intptr_t *, uintptr_t, memory_order); intptr_t __ovld atomic_fetch_xor_explicit(volatile __local atomic_intptr_t *, uintptr_t, memory_order); long __ovld atomic_fetch_and_explicit(volatile __global atomic_long *, long, memory_order); long __ovld atomic_fetch_and_explicit(volatile __local atomic_long *, long, memory_order); ulong __ovld atomic_fetch_and_explicit(volatile __global atomic_ulong *, ulong, memory_order); ulong __ovld atomic_fetch_and_explicit(volatile __local atomic_ulong *, ulong, memory_order); uintptr_t __ovld atomic_fetch_and_explicit(volatile __global atomic_uintptr_t *, intptr_t, memory_order); uintptr_t __ovld atomic_fetch_and_explicit(volatile __local atomic_uintptr_t *, intptr_t, memory_order); intptr_t __ovld atomic_fetch_and_explicit(volatile __global atomic_intptr_t *, uintptr_t, memory_order); intptr_t __ovld atomic_fetch_and_explicit(volatile __local atomic_intptr_t *, uintptr_t, memory_order); long __ovld atomic_fetch_min_explicit(volatile __global atomic_long *, long, memory_order); long __ovld atomic_fetch_min_explicit(volatile __local atomic_long *, long, memory_order); ulong __ovld atomic_fetch_min_explicit(volatile __global atomic_ulong *, ulong, memory_order); ulong __ovld atomic_fetch_min_explicit(volatile __local atomic_ulong *, ulong, memory_order); uintptr_t __ovld atomic_fetch_min_explicit(volatile __global atomic_uintptr_t *, intptr_t, memory_order); uintptr_t __ovld atomic_fetch_min_explicit(volatile __local atomic_uintptr_t *, intptr_t, memory_order); intptr_t __ovld atomic_fetch_min_explicit(volatile __global atomic_intptr_t *, uintptr_t, memory_order); intptr_t __ovld atomic_fetch_min_explicit(volatile __local atomic_intptr_t *, uintptr_t, memory_order); long __ovld atomic_fetch_max_explicit(volatile __global atomic_long *, long, memory_order); long __ovld atomic_fetch_max_explicit(volatile __local atomic_long *, long, memory_order); ulong __ovld atomic_fetch_max_explicit(volatile __global atomic_ulong *, ulong, memory_order); ulong __ovld atomic_fetch_max_explicit(volatile __local atomic_ulong *, ulong, memory_order); uintptr_t __ovld atomic_fetch_max_explicit(volatile __global atomic_uintptr_t *, uintptr_t, memory_order); uintptr_t __ovld atomic_fetch_max_explicit(volatile __local atomic_uintptr_t *, uintptr_t, memory_order); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) #endif #if defined(__opencl_c_generic_address_space) int __ovld atomic_fetch_add_explicit(volatile atomic_int *, int, memory_order, memory_scope); uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *, uint, memory_order, memory_scope); int __ovld atomic_fetch_sub_explicit(volatile atomic_int *, int, memory_order, memory_scope); uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *, uint, memory_order, memory_scope); int __ovld atomic_fetch_or_explicit(volatile atomic_int *, int, memory_order, memory_scope); uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *, uint, memory_order, memory_scope); int __ovld atomic_fetch_xor_explicit(volatile atomic_int *, int, memory_order, memory_scope); uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *, uint, memory_order, memory_scope); int __ovld atomic_fetch_and_explicit(volatile atomic_int *, int, memory_order, memory_scope); uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *, uint, memory_order, memory_scope); int __ovld atomic_fetch_min_explicit(volatile atomic_int *, int, memory_order, memory_scope); uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *, uint, memory_order, memory_scope); int __ovld atomic_fetch_max_explicit(volatile atomic_int *, int, memory_order, memory_scope); uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *, uint, memory_order, memory_scope); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) long __ovld atomic_fetch_add_explicit(volatile atomic_long *, long, memory_order, memory_scope); ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *, ulong, memory_order, memory_scope); long __ovld atomic_fetch_sub_explicit(volatile atomic_long *, long, memory_order, memory_scope); ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *, ulong, memory_order, memory_scope); long __ovld atomic_fetch_or_explicit(volatile atomic_long *, long, memory_order, memory_scope); ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *, ulong, memory_order, memory_scope); long __ovld atomic_fetch_xor_explicit(volatile atomic_long *, long, memory_order, memory_scope); ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *, ulong, memory_order, memory_scope); long __ovld atomic_fetch_and_explicit(volatile atomic_long *, long, memory_order, memory_scope); ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *, ulong, memory_order, memory_scope); long __ovld atomic_fetch_min_explicit(volatile atomic_long *, long, memory_order, memory_scope); ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *, ulong, memory_order, memory_scope); long __ovld atomic_fetch_max_explicit(volatile atomic_long *, long, memory_order, memory_scope); ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *, ulong, memory_order, memory_scope); uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *, ptrdiff_t, memory_order, memory_scope); uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *, ptrdiff_t, memory_order, memory_scope); #endif #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) int __ovld atomic_fetch_add_explicit(volatile __global atomic_int *, int, memory_order, memory_scope); int __ovld atomic_fetch_add_explicit(volatile __local atomic_int *, int, memory_order, memory_scope); uint __ovld atomic_fetch_add_explicit(volatile __global atomic_uint *, uint, memory_order, memory_scope); uint __ovld atomic_fetch_add_explicit(volatile __local atomic_uint *, uint, memory_order, memory_scope); int __ovld atomic_fetch_sub_explicit(volatile __global atomic_int *, int, memory_order, memory_scope); int __ovld atomic_fetch_sub_explicit(volatile __local atomic_int *, int, memory_order, memory_scope); uint __ovld atomic_fetch_sub_explicit(volatile __global atomic_uint *, uint, memory_order, memory_scope); uint __ovld atomic_fetch_sub_explicit(volatile __local atomic_uint *, uint, memory_order, memory_scope); int __ovld atomic_fetch_or_explicit(volatile __global atomic_int *, int, memory_order, memory_scope); int __ovld atomic_fetch_or_explicit(volatile __local atomic_int *, int, memory_order, memory_scope); uint __ovld atomic_fetch_or_explicit(volatile __global atomic_uint *, uint, memory_order, memory_scope); uint __ovld atomic_fetch_or_explicit(volatile __local atomic_uint *, uint, memory_order, memory_scope); int __ovld atomic_fetch_xor_explicit(volatile __global atomic_int *, int, memory_order, memory_scope); int __ovld atomic_fetch_xor_explicit(volatile __local atomic_int *, int, memory_order, memory_scope); uint __ovld atomic_fetch_xor_explicit(volatile __global atomic_uint *, uint, memory_order, memory_scope); uint __ovld atomic_fetch_xor_explicit(volatile __local atomic_uint *, uint, memory_order, memory_scope); int __ovld atomic_fetch_and_explicit(volatile __global atomic_int *, int, memory_order, memory_scope); int __ovld atomic_fetch_and_explicit(volatile __local atomic_int *, int, memory_order, memory_scope); uint __ovld atomic_fetch_and_explicit(volatile __global atomic_uint *, uint, memory_order, memory_scope); uint __ovld atomic_fetch_and_explicit(volatile __local atomic_uint *, uint, memory_order, memory_scope); int __ovld atomic_fetch_min_explicit(volatile __global atomic_int *, int, memory_order, memory_scope); int __ovld atomic_fetch_min_explicit(volatile __local atomic_int *, int, memory_order, memory_scope); uint __ovld atomic_fetch_min_explicit(volatile __global atomic_uint *, uint, memory_order, memory_scope); uint __ovld atomic_fetch_min_explicit(volatile __local atomic_uint *, uint, memory_order, memory_scope); int __ovld atomic_fetch_max_explicit(volatile __global atomic_int *, int, memory_order, memory_scope); int __ovld atomic_fetch_max_explicit(volatile __local atomic_int *, int, memory_order, memory_scope); uint __ovld atomic_fetch_max_explicit(volatile __global atomic_uint *, uint, memory_order, memory_scope); uint __ovld atomic_fetch_max_explicit(volatile __local atomic_uint *, uint, memory_order, memory_scope); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) long __ovld atomic_fetch_add_explicit(volatile __global atomic_long *, long, memory_order, memory_scope); long __ovld atomic_fetch_add_explicit(volatile __local atomic_long *, long, memory_order, memory_scope); uintptr_t __ovld atomic_fetch_add_explicit(volatile __global atomic_uintptr_t *, ptrdiff_t, memory_order, memory_scope); uintptr_t __ovld atomic_fetch_add_explicit(volatile __local atomic_uintptr_t *, ptrdiff_t, memory_order, memory_scope); ulong __ovld atomic_fetch_add_explicit(volatile __global atomic_ulong *, ulong, memory_order, memory_scope); ulong __ovld atomic_fetch_add_explicit(volatile __local atomic_ulong *, ulong, memory_order, memory_scope); long __ovld atomic_fetch_sub_explicit(volatile __global atomic_long *, long, memory_order, memory_scope); long __ovld atomic_fetch_sub_explicit(volatile __local atomic_long *, long, memory_order, memory_scope); ulong __ovld atomic_fetch_sub_explicit(volatile __global atomic_ulong *, ulong, memory_order, memory_scope); ulong __ovld atomic_fetch_sub_explicit(volatile __local atomic_ulong *, ulong, memory_order, memory_scope); uintptr_t __ovld atomic_fetch_sub_explicit(volatile __global atomic_uintptr_t *, ptrdiff_t, memory_order, memory_scope); uintptr_t __ovld atomic_fetch_sub_explicit(volatile __local atomic_uintptr_t *, ptrdiff_t, memory_order, memory_scope); long __ovld atomic_fetch_or_explicit(volatile __global atomic_long *, long, memory_order, memory_scope); long __ovld atomic_fetch_or_explicit(volatile __local atomic_long *, long, memory_order, memory_scope); ulong __ovld atomic_fetch_or_explicit(volatile __global atomic_ulong *, ulong, memory_order, memory_scope); ulong __ovld atomic_fetch_or_explicit(volatile __local atomic_ulong *, ulong, memory_order, memory_scope); uintptr_t __ovld atomic_fetch_or_explicit(volatile __global atomic_uintptr_t *, intptr_t, memory_order, memory_scope); uintptr_t __ovld atomic_fetch_or_explicit(volatile __local atomic_uintptr_t *, intptr_t, memory_order, memory_scope); intptr_t __ovld atomic_fetch_or_explicit(volatile __global atomic_intptr_t *, uintptr_t, memory_order, memory_scope); intptr_t __ovld atomic_fetch_or_explicit(volatile __local atomic_intptr_t *, uintptr_t, memory_order, memory_scope); long __ovld atomic_fetch_xor_explicit(volatile __global atomic_long *, long, memory_order, memory_scope); long __ovld atomic_fetch_xor_explicit(volatile __local atomic_long *, long, memory_order, memory_scope); ulong __ovld atomic_fetch_xor_explicit(volatile __global atomic_ulong *, ulong, memory_order, memory_scope); ulong __ovld atomic_fetch_xor_explicit(volatile __local atomic_ulong *, ulong, memory_order, memory_scope); uintptr_t __ovld atomic_fetch_xor_explicit(volatile __global atomic_uintptr_t *, intptr_t, memory_order, memory_scope); uintptr_t __ovld atomic_fetch_xor_explicit(volatile __local atomic_uintptr_t *, intptr_t, memory_order, memory_scope); intptr_t __ovld atomic_fetch_xor_explicit(volatile __global atomic_intptr_t *, uintptr_t, memory_order, memory_scope); intptr_t __ovld atomic_fetch_xor_explicit(volatile __local atomic_intptr_t *, uintptr_t, memory_order, memory_scope); long __ovld atomic_fetch_and_explicit(volatile __global atomic_long *, long, memory_order, memory_scope); long __ovld atomic_fetch_and_explicit(volatile __local atomic_long *, long, memory_order, memory_scope); ulong __ovld atomic_fetch_and_explicit(volatile __global atomic_ulong *, ulong, memory_order, memory_scope); ulong __ovld atomic_fetch_and_explicit(volatile __local atomic_ulong *, ulong, memory_order, memory_scope); uintptr_t __ovld atomic_fetch_and_explicit(volatile __global atomic_uintptr_t *, intptr_t, memory_order, memory_scope); uintptr_t __ovld atomic_fetch_and_explicit(volatile __local atomic_uintptr_t *, intptr_t, memory_order, memory_scope); intptr_t __ovld atomic_fetch_and_explicit(volatile __global atomic_intptr_t *, uintptr_t, memory_order, memory_scope); intptr_t __ovld atomic_fetch_and_explicit(volatile __local atomic_intptr_t *, uintptr_t, memory_order, memory_scope); long __ovld atomic_fetch_min_explicit(volatile __global atomic_long *, long, memory_order, memory_scope); long __ovld atomic_fetch_min_explicit(volatile __local atomic_long *, long, memory_order, memory_scope); ulong __ovld atomic_fetch_min_explicit(volatile __global atomic_ulong *, ulong, memory_order, memory_scope); ulong __ovld atomic_fetch_min_explicit(volatile __local atomic_ulong *, ulong, memory_order, memory_scope); uintptr_t __ovld atomic_fetch_min_explicit(volatile __global atomic_uintptr_t *, intptr_t, memory_order, memory_scope); uintptr_t __ovld atomic_fetch_min_explicit(volatile __local atomic_uintptr_t *, intptr_t, memory_order, memory_scope); intptr_t __ovld atomic_fetch_min_explicit(volatile __global atomic_intptr_t *, uintptr_t, memory_order, memory_scope); intptr_t __ovld atomic_fetch_min_explicit(volatile __local atomic_intptr_t *, uintptr_t, memory_order, memory_scope); long __ovld atomic_fetch_max_explicit(volatile __global atomic_long *, long, memory_order, memory_scope); long __ovld atomic_fetch_max_explicit(volatile __local atomic_long *, long, memory_order, memory_scope); ulong __ovld atomic_fetch_max_explicit(volatile __global atomic_ulong *, ulong, memory_order, memory_scope); ulong __ovld atomic_fetch_max_explicit(volatile __local atomic_ulong *, ulong, memory_order, memory_scope); uintptr_t __ovld atomic_fetch_max_explicit(volatile __global atomic_uintptr_t *, uintptr_t, memory_order, memory_scope); uintptr_t __ovld atomic_fetch_max_explicit(volatile __local atomic_uintptr_t *, uintptr_t, memory_order, memory_scope); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) // The functionality added by cl_ext_float_atomics extension #if defined(cl_ext_float_atomics) #if defined(__opencl_c_ext_fp16_global_atomic_load_store) void __ovld atomic_store(volatile __global atomic_half *, half); void __ovld atomic_store_explicit(volatile __global atomic_half *, half, memory_order); void __ovld atomic_store_explicit(volatile __global atomic_half *, half, memory_order, memory_scope); half __ovld atomic_load(volatile __global atomic_half *); half __ovld atomic_load_explicit(volatile __global atomic_half *, memory_order); half __ovld atomic_load_explicit(volatile __global atomic_half *, memory_order, memory_scope); half __ovld atomic_exchange(volatile __global atomic_half *, half); half __ovld atomic_exchange_explicit(volatile __global atomic_half *, half, memory_order); half __ovld atomic_exchange_explicit(volatile __global atomic_half *, half, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp16_global_atomic_load_store) #if defined(__opencl_c_ext_fp16_local_atomic_load_store) void __ovld atomic_store(volatile __local atomic_half *, half); void __ovld atomic_store_explicit(volatile __local atomic_half *, half, memory_order); void __ovld atomic_store_explicit(volatile __local atomic_half *, half, memory_order, memory_scope); half __ovld atomic_load(volatile __local atomic_half *); half __ovld atomic_load_explicit(volatile __local atomic_half *, memory_order); half __ovld atomic_load_explicit(volatile __local atomic_half *, memory_order, memory_scope); half __ovld atomic_exchange(volatile __local atomic_half *, half); half __ovld atomic_exchange_explicit(volatile __local atomic_half *, half, memory_order); half __ovld atomic_exchange_explicit(volatile __local atomic_half *, half, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp16_local_atomic_load_store) #if defined(__opencl_c_ext_fp16_global_atomic_load_store) && \ defined(__opencl_c_ext_fp16_local_atomic_load_store) void __ovld atomic_store(volatile atomic_half *, half); void __ovld atomic_store_explicit(volatile atomic_half *, half, memory_order); void __ovld atomic_store_explicit(volatile atomic_half *, half, memory_order, memory_scope); half __ovld atomic_load(volatile atomic_half *); half __ovld atomic_load_explicit(volatile atomic_half *, memory_order); half __ovld atomic_load_explicit(volatile atomic_half *, memory_order, memory_scope); half __ovld atomic_exchange(volatile atomic_half *, half); half __ovld atomic_exchange_explicit(volatile atomic_half *, half, memory_order); half __ovld atomic_exchange_explicit(volatile atomic_half *, half, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp16_global_atomic_load_store) && // defined(__opencl_c_ext_fp16_local_atomic_load_store) #if defined(__opencl_c_ext_fp16_global_atomic_min_max) half __ovld atomic_fetch_min(volatile __global atomic_half *, half); half __ovld atomic_fetch_max(volatile __global atomic_half *, half); half __ovld atomic_fetch_min_explicit(volatile __global atomic_half *, half, memory_order); half __ovld atomic_fetch_max_explicit(volatile __global atomic_half *, half, memory_order); half __ovld atomic_fetch_min_explicit(volatile __global atomic_half *, half, memory_order, memory_scope); half __ovld atomic_fetch_max_explicit(volatile __global atomic_half *, half, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp16_global_atomic_min_max) #if defined(__opencl_c_ext_fp16_local_atomic_min_max) half __ovld atomic_fetch_min(volatile __local atomic_half *, half); half __ovld atomic_fetch_max(volatile __local atomic_half *, half); half __ovld atomic_fetch_min_explicit(volatile __local atomic_half *, half, memory_order); half __ovld atomic_fetch_max_explicit(volatile __local atomic_half *, half, memory_order); half __ovld atomic_fetch_min_explicit(volatile __local atomic_half *, half, memory_order, memory_scope); half __ovld atomic_fetch_max_explicit(volatile __local atomic_half *, half, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp16_local_atomic_min_max) #if defined(__opencl_c_ext_fp16_global_atomic_min_max) && \ defined(__opencl_c_ext_fp16_local_atomic_min_max) half __ovld atomic_fetch_min(volatile atomic_half *, half); half __ovld atomic_fetch_max(volatile atomic_half *, half); half __ovld atomic_fetch_min_explicit(volatile atomic_half *, half, memory_order); half __ovld atomic_fetch_max_explicit(volatile atomic_half *, half, memory_order); half __ovld atomic_fetch_min_explicit(volatile atomic_half *, half, memory_order, memory_scope); half __ovld atomic_fetch_max_explicit(volatile atomic_half *, half, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp16_global_atomic_min_max) && \ defined(__opencl_c_ext_fp16_local_atomic_min_max) #if defined(__opencl_c_ext_fp32_global_atomic_min_max) float __ovld atomic_fetch_min(volatile __global atomic_float *, float); float __ovld atomic_fetch_max(volatile __global atomic_float *, float); float __ovld atomic_fetch_min_explicit(volatile __global atomic_float *, float, memory_order); float __ovld atomic_fetch_max_explicit(volatile __global atomic_float *, float, memory_order); float __ovld atomic_fetch_min_explicit(volatile __global atomic_float *, float, memory_order, memory_scope); float __ovld atomic_fetch_max_explicit(volatile __global atomic_float *, float, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp32_global_atomic_min_max) #if defined(__opencl_c_ext_fp32_local_atomic_min_max) float __ovld atomic_fetch_min(volatile __local atomic_float *, float); float __ovld atomic_fetch_max(volatile __local atomic_float *, float); float __ovld atomic_fetch_min_explicit(volatile __local atomic_float *, float, memory_order); float __ovld atomic_fetch_max_explicit(volatile __local atomic_float *, float, memory_order); float __ovld atomic_fetch_min_explicit(volatile __local atomic_float *, float, memory_order, memory_scope); float __ovld atomic_fetch_max_explicit(volatile __local atomic_float *, float, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp32_local_atomic_min_max) #if defined(__opencl_c_ext_fp32_global_atomic_min_max) && \ defined(__opencl_c_ext_fp32_local_atomic_min_max) float __ovld atomic_fetch_min(volatile atomic_float *, float); float __ovld atomic_fetch_max(volatile atomic_float *, float); float __ovld atomic_fetch_min_explicit(volatile atomic_float *, float, memory_order); float __ovld atomic_fetch_max_explicit(volatile atomic_float *, float, memory_order); float __ovld atomic_fetch_min_explicit(volatile atomic_float *, float, memory_order, memory_scope); float __ovld atomic_fetch_max_explicit(volatile atomic_float *, float, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp32_global_atomic_min_max) && \ defined(__opencl_c_ext_fp32_local_atomic_min_max) #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #if defined(__opencl_c_ext_fp64_global_atomic_min_max) double __ovld atomic_fetch_min(volatile __global atomic_double *, double); double __ovld atomic_fetch_max(volatile __global atomic_double *, double); double __ovld atomic_fetch_min_explicit(volatile __global atomic_double *, double, memory_order); double __ovld atomic_fetch_max_explicit(volatile __global atomic_double *, double, memory_order); double __ovld atomic_fetch_min_explicit(volatile __global atomic_double *, double, memory_order, memory_scope); double __ovld atomic_fetch_max_explicit(volatile __global atomic_double *, double, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp64_global_atomic_min_max) #if defined(__opencl_c_ext_fp64_local_atomic_min_max) double __ovld atomic_fetch_min(volatile __local atomic_double *, double); double __ovld atomic_fetch_max(volatile __local atomic_double *, double); double __ovld atomic_fetch_min_explicit(volatile __local atomic_double *, double, memory_order); double __ovld atomic_fetch_max_explicit(volatile __local atomic_double *, double, memory_order); double __ovld atomic_fetch_min_explicit(volatile __local atomic_double *, double, memory_order, memory_scope); double __ovld atomic_fetch_max_explicit(volatile __local atomic_double *, double, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp64_local_atomic_min_max) #if defined(__opencl_c_ext_fp64_global_atomic_min_max) && \ defined(__opencl_c_ext_fp64_local_atomic_min_max) double __ovld atomic_fetch_min(volatile atomic_double *, double); double __ovld atomic_fetch_max(volatile atomic_double *, double); double __ovld atomic_fetch_min_explicit(volatile atomic_double *, double, memory_order); double __ovld atomic_fetch_max_explicit(volatile atomic_double *, double, memory_order); double __ovld atomic_fetch_min_explicit(volatile atomic_double *, double, memory_order, memory_scope); double __ovld atomic_fetch_max_explicit(volatile atomic_double *, double, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp64_global_atomic_min_max) && \ defined(__opencl_c_ext_fp64_local_atomic_min_max) #endif // defined(cl_khr_int64_base_atomics) && \ defined(cl_khr_int64_extended_atomics) #if defined(__opencl_c_ext_fp16_global_atomic_add) half __ovld atomic_fetch_add(volatile __global atomic_half *, half); half __ovld atomic_fetch_sub(volatile __global atomic_half *, half); half __ovld atomic_fetch_add_explicit(volatile __global atomic_half *, half, memory_order); half __ovld atomic_fetch_sub_explicit(volatile __global atomic_half *, half, memory_order); half __ovld atomic_fetch_add_explicit(volatile __global atomic_half *, half, memory_order, memory_scope); half __ovld atomic_fetch_sub_explicit(volatile __global atomic_half *, half, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp16_global_atomic_add) #if defined(__opencl_c_ext_fp16_local_atomic_add) half __ovld atomic_fetch_add(volatile __local atomic_half *, half); half __ovld atomic_fetch_sub(volatile __local atomic_half *, half); half __ovld atomic_fetch_add_explicit(volatile __local atomic_half *, half, memory_order); half __ovld atomic_fetch_sub_explicit(volatile __local atomic_half *, half, memory_order); half __ovld atomic_fetch_add_explicit(volatile __local atomic_half *, half, memory_order, memory_scope); half __ovld atomic_fetch_sub_explicit(volatile __local atomic_half *, half, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp16_local_atomic_add) #if defined(__opencl_c_ext_fp16_global_atomic_add) && \ defined(__opencl_c_ext_fp16_local_atomic_add) half __ovld atomic_fetch_add(volatile atomic_half *, half); half __ovld atomic_fetch_sub(volatile atomic_half *, half); half __ovld atomic_fetch_add_explicit(volatile atomic_half *, half, memory_order); half __ovld atomic_fetch_sub_explicit(volatile atomic_half *, half, memory_order); half __ovld atomic_fetch_add_explicit(volatile atomic_half *, half, memory_order, memory_scope); half __ovld atomic_fetch_sub_explicit(volatile atomic_half *, half, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp16_global_atomic_add) && \ defined(__opencl_c_ext_fp16_local_atomic_add) #if defined(__opencl_c_ext_fp32_global_atomic_add) float __ovld atomic_fetch_add(volatile __global atomic_float *, float); float __ovld atomic_fetch_sub(volatile __global atomic_float *, float); float __ovld atomic_fetch_add_explicit(volatile __global atomic_float *, float, memory_order); float __ovld atomic_fetch_sub_explicit(volatile __global atomic_float *, float, memory_order); float __ovld atomic_fetch_add_explicit(volatile __global atomic_float *, float, memory_order, memory_scope); float __ovld atomic_fetch_sub_explicit(volatile __global atomic_float *, float, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp32_global_atomic_add) #if defined(__opencl_c_ext_fp32_local_atomic_add) float __ovld atomic_fetch_add(volatile __local atomic_float *, float); float __ovld atomic_fetch_sub(volatile __local atomic_float *, float); float __ovld atomic_fetch_add_explicit(volatile __local atomic_float *, float, memory_order); float __ovld atomic_fetch_sub_explicit(volatile __local atomic_float *, float, memory_order); float __ovld atomic_fetch_add_explicit(volatile __local atomic_float *, float, memory_order, memory_scope); float __ovld atomic_fetch_sub_explicit(volatile __local atomic_float *, float, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp32_local_atomic_add) #if defined(__opencl_c_ext_fp32_global_atomic_add) && \ defined(__opencl_c_ext_fp32_local_atomic_add) float __ovld atomic_fetch_add(volatile atomic_float *, float); float __ovld atomic_fetch_sub(volatile atomic_float *, float); float __ovld atomic_fetch_add_explicit(volatile atomic_float *, float, memory_order); float __ovld atomic_fetch_sub_explicit(volatile atomic_float *, float, memory_order); float __ovld atomic_fetch_add_explicit(volatile atomic_float *, float, memory_order, memory_scope); float __ovld atomic_fetch_sub_explicit(volatile atomic_float *, float, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp32_global_atomic_add) && \ defined(__opencl_c_ext_fp32_local_atomic_add) #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #if defined(__opencl_c_ext_fp64_global_atomic_add) double __ovld atomic_fetch_add(volatile __global atomic_double *, double); double __ovld atomic_fetch_sub(volatile __global atomic_double *, double); double __ovld atomic_fetch_add_explicit(volatile __global atomic_double *, double, memory_order); double __ovld atomic_fetch_sub_explicit(volatile __global atomic_double *, double, memory_order); double __ovld atomic_fetch_add_explicit(volatile __global atomic_double *, double, memory_order, memory_scope); double __ovld atomic_fetch_sub_explicit(volatile __global atomic_double *, double, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp64_global_atomic_add) #if defined(__opencl_c_ext_fp64_local_atomic_add) double __ovld atomic_fetch_add(volatile __local atomic_double *, double); double __ovld atomic_fetch_sub(volatile __local atomic_double *, double); double __ovld atomic_fetch_add_explicit(volatile __local atomic_double *, double, memory_order); double __ovld atomic_fetch_sub_explicit(volatile __local atomic_double *, double, memory_order); double __ovld atomic_fetch_add_explicit(volatile __local atomic_double *, double, memory_order, memory_scope); double __ovld atomic_fetch_sub_explicit(volatile __local atomic_double *, double, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp64_local_atomic_add) #if defined(__opencl_c_ext_fp64_global_atomic_add) && \ defined(__opencl_c_ext_fp64_local_atomic_add) double __ovld atomic_fetch_add(volatile atomic_double *, double); double __ovld atomic_fetch_sub(volatile atomic_double *, double); double __ovld atomic_fetch_add_explicit(volatile atomic_double *, double, memory_order); double __ovld atomic_fetch_sub_explicit(volatile atomic_double *, double, memory_order); double __ovld atomic_fetch_add_explicit(volatile atomic_double *, double, memory_order, memory_scope); double __ovld atomic_fetch_sub_explicit(volatile atomic_double *, double, memory_order, memory_scope); #endif // defined(__opencl_c_ext_fp64_global_atomic_add) && \ defined(__opencl_c_ext_fp64_local_atomic_add) #endif // defined(cl_khr_int64_base_atomics) && \ defined(cl_khr_int64_extended_atomics) #endif // cl_ext_float_atomics // atomic_store() #if defined(__opencl_c_atomic_order_seq_cst) && defined(__opencl_c_atomic_scope_device) #if defined(__opencl_c_generic_address_space) void __ovld atomic_store(volatile atomic_int *, int); void __ovld atomic_store(volatile atomic_uint *, uint); void __ovld atomic_store(volatile atomic_float *, float); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 void __ovld atomic_store(volatile atomic_double *, double); #endif //cl_khr_fp64 void __ovld atomic_store(volatile atomic_long *, long); void __ovld atomic_store(volatile atomic_ulong *, ulong); #endif #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) void __ovld atomic_store(volatile __global atomic_int *, int); void __ovld atomic_store(volatile __local atomic_int *, int); void __ovld atomic_store(volatile __global atomic_uint *, uint); void __ovld atomic_store(volatile __local atomic_uint *, uint); void __ovld atomic_store(volatile __global atomic_float *, float); void __ovld atomic_store(volatile __local atomic_float *, float); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 void __ovld atomic_store(volatile __global atomic_double *, double); void __ovld atomic_store(volatile __local atomic_double *, double); #endif //cl_khr_fp64 void __ovld atomic_store(volatile __global atomic_long *, long); void __ovld atomic_store(volatile __local atomic_long *, long); void __ovld atomic_store(volatile __global atomic_ulong *, ulong); void __ovld atomic_store(volatile __local atomic_ulong *, ulong); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) #endif #if defined(__opencl_c_atomic_scope_device) #if defined(__opencl_c_generic_address_space) void __ovld atomic_store_explicit(volatile atomic_int *, int, memory_order); void __ovld atomic_store_explicit(volatile atomic_uint *, uint, memory_order); void __ovld atomic_store_explicit(volatile atomic_float *, float, memory_order); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 void __ovld atomic_store_explicit(volatile atomic_double *, double, memory_order); #endif //cl_khr_fp64 void __ovld atomic_store_explicit(volatile atomic_long *, long, memory_order); void __ovld atomic_store_explicit(volatile atomic_ulong *, ulong, memory_order); #endif #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) void __ovld atomic_store_explicit(volatile __global atomic_int *, int, memory_order); void __ovld atomic_store_explicit(volatile __local atomic_int *, int, memory_order); void __ovld atomic_store_explicit(volatile __global atomic_uint *, uint, memory_order); void __ovld atomic_store_explicit(volatile __local atomic_uint *, uint, memory_order); void __ovld atomic_store_explicit(volatile __global atomic_float *, float, memory_order); void __ovld atomic_store_explicit(volatile __local atomic_float *, float, memory_order); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 void __ovld atomic_store_explicit(volatile __global atomic_double *, double, memory_order); void __ovld atomic_store_explicit(volatile __local atomic_double *, double, memory_order); #endif void __ovld atomic_store_explicit(volatile __global atomic_long *, long, memory_order); void __ovld atomic_store_explicit(volatile __local atomic_long *, long, memory_order); void __ovld atomic_store_explicit(volatile __global atomic_ulong *, ulong, memory_order); void __ovld atomic_store_explicit(volatile __local atomic_ulong *, ulong, memory_order); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) #endif #if defined(__opencl_c_generic_address_space) void __ovld atomic_store_explicit(volatile atomic_int *, int, memory_order, memory_scope); void __ovld atomic_store_explicit(volatile atomic_uint *, uint, memory_order, memory_scope); void __ovld atomic_store_explicit(volatile atomic_float *, float, memory_order, memory_scope); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 void __ovld atomic_store_explicit(volatile atomic_double *, double, memory_order, memory_scope); #endif //cl_khr_fp64 void __ovld atomic_store_explicit(volatile atomic_long *, long, memory_order, memory_scope); void __ovld atomic_store_explicit(volatile atomic_ulong *, ulong, memory_order, memory_scope); #endif #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) void __ovld atomic_store_explicit(volatile __global atomic_int *, int, memory_order, memory_scope); void __ovld atomic_store_explicit(volatile __local atomic_int *, int, memory_order, memory_scope); void __ovld atomic_store_explicit(volatile __global atomic_uint *, uint, memory_order, memory_scope); void __ovld atomic_store_explicit(volatile __local atomic_uint *, uint, memory_order, memory_scope); void __ovld atomic_store_explicit(volatile __global atomic_float *, float, memory_order, memory_scope); void __ovld atomic_store_explicit(volatile __local atomic_float *, float, memory_order, memory_scope); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 void __ovld atomic_store_explicit(volatile __global atomic_double *, double, memory_order, memory_scope); void __ovld atomic_store_explicit(volatile __local atomic_double *, double, memory_order, memory_scope); #endif //cl_khr_fp64 void __ovld atomic_store_explicit(volatile __global atomic_long *, long, memory_order, memory_scope); void __ovld atomic_store_explicit(volatile __local atomic_long *, long, memory_order, memory_scope); void __ovld atomic_store_explicit(volatile __global atomic_ulong *, ulong, memory_order, memory_scope); void __ovld atomic_store_explicit(volatile __local atomic_ulong *, ulong, memory_order, memory_scope); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) // atomic_load() #if defined(__opencl_c_atomic_order_seq_cst) && defined(__opencl_c_atomic_scope_device) #if defined(__opencl_c_generic_address_space) int __ovld atomic_load(volatile atomic_int *); uint __ovld atomic_load(volatile atomic_uint *); float __ovld atomic_load(volatile atomic_float *); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 double __ovld atomic_load(volatile atomic_double *); #endif //cl_khr_fp64 long __ovld atomic_load(volatile atomic_long *); ulong __ovld atomic_load(volatile atomic_ulong *); #endif #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) int __ovld atomic_load(volatile __global atomic_int *); int __ovld atomic_load(volatile __local atomic_int *); uint __ovld atomic_load(volatile __global atomic_uint *); uint __ovld atomic_load(volatile __local atomic_uint *); float __ovld atomic_load(volatile __global atomic_float *); float __ovld atomic_load(volatile __local atomic_float *); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 double __ovld atomic_load(volatile __global atomic_double *); double __ovld atomic_load(volatile __local atomic_double *); #endif //cl_khr_fp64 long __ovld atomic_load(volatile __global atomic_long *); long __ovld atomic_load(volatile __local atomic_long *); ulong __ovld atomic_load(volatile __global atomic_ulong *); ulong __ovld atomic_load(volatile __local atomic_ulong *); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) #endif #if defined(__opencl_c_atomic_scope_device) #if defined(__opencl_c_generic_address_space) int __ovld atomic_load_explicit(volatile atomic_int *, memory_order); uint __ovld atomic_load_explicit(volatile atomic_uint *, memory_order); float __ovld atomic_load_explicit(volatile atomic_float *, memory_order); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 double __ovld atomic_load_explicit(volatile atomic_double *, memory_order); #endif //cl_khr_fp64 long __ovld atomic_load_explicit(volatile atomic_long *, memory_order); ulong __ovld atomic_load_explicit(volatile atomic_ulong *, memory_order); #endif #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) int __ovld atomic_load_explicit(volatile __global atomic_int *, memory_order); int __ovld atomic_load_explicit(volatile __local atomic_int *, memory_order); uint __ovld atomic_load_explicit(volatile __global atomic_uint *, memory_order); uint __ovld atomic_load_explicit(volatile __local atomic_uint *, memory_order); float __ovld atomic_load_explicit(volatile __global atomic_float *, memory_order); float __ovld atomic_load_explicit(volatile __local atomic_float *, memory_order); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 double __ovld atomic_load_explicit(volatile __global atomic_double *, memory_order); double __ovld atomic_load_explicit(volatile __local atomic_double *, memory_order); #endif //cl_khr_fp64 long __ovld atomic_load_explicit(volatile __global atomic_long *, memory_order); long __ovld atomic_load_explicit(volatile __local atomic_long *, memory_order); ulong __ovld atomic_load_explicit(volatile __global atomic_ulong *, memory_order); ulong __ovld atomic_load_explicit(volatile __local atomic_ulong *, memory_order); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) #endif #if defined(__opencl_c_generic_address_space) int __ovld atomic_load_explicit(volatile atomic_int *, memory_order, memory_scope); uint __ovld atomic_load_explicit(volatile atomic_uint *, memory_order, memory_scope); float __ovld atomic_load_explicit(volatile atomic_float *, memory_order, memory_scope); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 double __ovld atomic_load_explicit(volatile atomic_double *, memory_order, memory_scope); #endif //cl_khr_fp64 long __ovld atomic_load_explicit(volatile atomic_long *, memory_order, memory_scope); ulong __ovld atomic_load_explicit(volatile atomic_ulong *, memory_order, memory_scope); #endif #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) int __ovld atomic_load_explicit(volatile __global atomic_int *, memory_order, memory_scope); int __ovld atomic_load_explicit(volatile __local atomic_int *, memory_order, memory_scope); uint __ovld atomic_load_explicit(volatile __global atomic_uint *, memory_order, memory_scope); uint __ovld atomic_load_explicit(volatile __local atomic_uint *, memory_order, memory_scope); float __ovld atomic_load_explicit(volatile __global atomic_float *, memory_order, memory_scope); float __ovld atomic_load_explicit(volatile __local atomic_float *, memory_order, memory_scope); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 double __ovld atomic_load_explicit(volatile __global atomic_double *, memory_order, memory_scope); double __ovld atomic_load_explicit(volatile __local atomic_double *, memory_order, memory_scope); #endif long __ovld atomic_load_explicit(volatile __global atomic_long *, memory_order, memory_scope); long __ovld atomic_load_explicit(volatile __local atomic_long *, memory_order, memory_scope); ulong __ovld atomic_load_explicit(volatile __global atomic_ulong *, memory_order, memory_scope); ulong __ovld atomic_load_explicit(volatile __local atomic_ulong *, memory_order, memory_scope); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) // atomic_exchange() #if defined(__opencl_c_atomic_order_seq_cst) && defined(__opencl_c_atomic_scope_device) #if defined(__opencl_c_generic_address_space) int __ovld atomic_exchange(volatile atomic_int *, int); uint __ovld atomic_exchange(volatile atomic_uint *, uint); float __ovld atomic_exchange(volatile atomic_float *, float); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 double __ovld atomic_exchange(volatile atomic_double *, double); #endif //cl_khr_fp64 long __ovld atomic_exchange(volatile atomic_long *, long); ulong __ovld atomic_exchange(volatile atomic_ulong *, ulong); #endif #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) int __ovld atomic_exchange(volatile __global atomic_int *, int); int __ovld atomic_exchange(volatile __local atomic_int *, int); uint __ovld atomic_exchange(volatile __global atomic_uint *, uint); uint __ovld atomic_exchange(volatile __local atomic_uint *, uint); float __ovld atomic_exchange(volatile __global atomic_float *, float); float __ovld atomic_exchange(volatile __local atomic_float *, float); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 double __ovld atomic_exchange(volatile __global atomic_double *, double); double __ovld atomic_exchange(volatile __local atomic_double *, double); #endif //cl_khr_fp64 long __ovld atomic_exchange(volatile __global atomic_long *, long); long __ovld atomic_exchange(volatile __local atomic_long *, long); ulong __ovld atomic_exchange(volatile __global atomic_ulong *, ulong); ulong __ovld atomic_exchange(volatile __local atomic_ulong *, ulong); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) #endif #if defined(__opencl_c_atomic_scope_device) #if defined(__opencl_c_generic_address_space) int __ovld atomic_exchange_explicit(volatile atomic_int *, int, memory_order); uint __ovld atomic_exchange_explicit(volatile atomic_uint *, uint, memory_order); float __ovld atomic_exchange_explicit(volatile atomic_float *, float, memory_order); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 double __ovld atomic_exchange_explicit(volatile atomic_double *, double, memory_order); #endif //cl_khr_fp64 long __ovld atomic_exchange_explicit(volatile atomic_long *, long, memory_order); ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *, ulong, memory_order); #endif #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) int __ovld atomic_exchange_explicit(volatile __global atomic_int *, int, memory_order); int __ovld atomic_exchange_explicit(volatile __local atomic_int *, int, memory_order); uint __ovld atomic_exchange_explicit(volatile __global atomic_uint *, uint, memory_order); uint __ovld atomic_exchange_explicit(volatile __local atomic_uint *, uint, memory_order); float __ovld atomic_exchange_explicit(volatile __global atomic_float *, float, memory_order); float __ovld atomic_exchange_explicit(volatile __local atomic_float *, float, memory_order); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 double __ovld atomic_exchange_explicit(volatile __global atomic_double *, double, memory_order); double __ovld atomic_exchange_explicit(volatile __local atomic_double *, double, memory_order); #endif //cl_khr_fp64 long __ovld atomic_exchange_explicit(volatile __global atomic_long *, long, memory_order); long __ovld atomic_exchange_explicit(volatile __local atomic_long *, long, memory_order); ulong __ovld atomic_exchange_explicit(volatile __global atomic_ulong *, ulong, memory_order); ulong __ovld atomic_exchange_explicit(volatile __local atomic_ulong *, ulong, memory_order); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)wi #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) #endif #if defined(__opencl_c_generic_address_space) int __ovld atomic_exchange_explicit(volatile atomic_int *, int, memory_order, memory_scope); uint __ovld atomic_exchange_explicit(volatile atomic_uint *, uint, memory_order, memory_scope); float __ovld atomic_exchange_explicit(volatile atomic_float *, float, memory_order, memory_scope); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 double __ovld atomic_exchange_explicit(volatile atomic_double *, double, memory_order, memory_scope); #endif //cl_khr_fp64 long __ovld atomic_exchange_explicit(volatile atomic_long *, long, memory_order, memory_scope); ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *, ulong, memory_order, memory_scope); #endif #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) int __ovld atomic_exchange_explicit(volatile __global atomic_int *, int, memory_order, memory_scope); int __ovld atomic_exchange_explicit(volatile __local atomic_int *, int, memory_order, memory_scope); uint __ovld atomic_exchange_explicit(volatile __global atomic_uint *, uint, memory_order, memory_scope); uint __ovld atomic_exchange_explicit(volatile __local atomic_uint *, uint, memory_order, memory_scope); float __ovld atomic_exchange_explicit(volatile __global atomic_float *, float, memory_order, memory_scope); float __ovld atomic_exchange_explicit(volatile __local atomic_float *, float, memory_order, memory_scope); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 double __ovld atomic_exchange_explicit(volatile __global atomic_double *, double, memory_order, memory_scope); double __ovld atomic_exchange_explicit(volatile __local atomic_double *, double, memory_order, memory_scope); #endif //cl_khr_fp64 long __ovld atomic_exchange_explicit(volatile __global atomic_long *, long, memory_order, memory_scope); long __ovld atomic_exchange_explicit(volatile __local atomic_long *, long, memory_order, memory_scope); ulong __ovld atomic_exchange_explicit(volatile __global atomic_ulong *, ulong, memory_order, memory_scope); ulong __ovld atomic_exchange_explicit(volatile __local atomic_ulong *, ulong, memory_order, memory_scope); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) // atomic_compare_exchange_strong() and atomic_compare_exchange_weak() #if defined(__opencl_c_atomic_order_seq_cst) && defined(__opencl_c_atomic_scope_device) #if defined(__opencl_c_generic_address_space) bool __ovld atomic_compare_exchange_strong(volatile atomic_int *, int *, int); bool __ovld atomic_compare_exchange_strong(volatile atomic_uint *, uint *, uint); bool __ovld atomic_compare_exchange_weak(volatile atomic_int *, int *, int); bool __ovld atomic_compare_exchange_weak(volatile atomic_uint *, uint *, uint); bool __ovld atomic_compare_exchange_strong(volatile atomic_float *, float *, float); bool __ovld atomic_compare_exchange_weak(volatile atomic_float *, float *, float); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 bool __ovld atomic_compare_exchange_strong(volatile atomic_double *, double *, double); bool __ovld atomic_compare_exchange_weak(volatile atomic_double *, double *, double); #endif //cl_khr_fp64 bool __ovld atomic_compare_exchange_strong(volatile atomic_long *, long *, long); bool __ovld atomic_compare_exchange_weak(volatile atomic_long *, long *, long); bool __ovld atomic_compare_exchange_strong(volatile atomic_ulong *, ulong *, ulong); bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong *, ulong *, ulong); #endif #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) bool __ovld atomic_compare_exchange_strong(volatile __global atomic_int *, __global int *, int); bool __ovld atomic_compare_exchange_strong(volatile __global atomic_int *, __local int *, int); bool __ovld atomic_compare_exchange_strong(volatile __global atomic_int *, __private int *, int); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_int *, __global int *, int); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_int *, __local int *, int); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_int *, __private int *, int); bool __ovld atomic_compare_exchange_strong(volatile __global atomic_uint *, __global uint *, uint); bool __ovld atomic_compare_exchange_strong(volatile __global atomic_uint *, __local uint *, uint); bool __ovld atomic_compare_exchange_strong(volatile __global atomic_uint *, __private uint *, uint); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_uint *, __global uint *, uint); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_uint *, __local uint *, uint); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_uint *, __private uint *, uint); bool __ovld atomic_compare_exchange_strong(volatile __global atomic_float *, __global float *, float); bool __ovld atomic_compare_exchange_strong(volatile __global atomic_float *, __local float *, float); bool __ovld atomic_compare_exchange_strong(volatile __global atomic_float *, __private float *, float); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_float *, __global float *, float); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_float *, __local float *, float); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_float *, __private float *, float); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_int *, __global int *, int); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_int *, __local int *, int); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_int *, __private int *, int); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_int *, __global int *, int); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_int *, __local int *, int); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_int *, __private int *, int); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_uint *, __global uint *, uint); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_uint *, __local uint *, uint); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_uint *, __private uint *, uint); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_uint *, __global uint *, uint); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_uint *, __local uint *, uint); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_uint *, __private uint *, uint); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_float *, __global float *, float); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_float *, __local float *, float); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_float *, __private float *, float); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_float *, __global float *, float); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_float *, __local float *, float); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_float *, __private float *, float); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 bool __ovld atomic_compare_exchange_strong(volatile __global atomic_double *, __global double *, double); bool __ovld atomic_compare_exchange_strong(volatile __global atomic_double *, __local double *, double); bool __ovld atomic_compare_exchange_strong(volatile __global atomic_double *, __private double *, double); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_double *, __global double *, double); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_double *, __local double *, double); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_double *, __private double *, double); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_double *, __global double *, double); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_double *, __local double *, double); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_double *, __private double *, double); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_double *, __global double *, double); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_double *, __local double *, double); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_double *, __private double *, double); #endif //cl_khr_fp64 bool __ovld atomic_compare_exchange_strong(volatile __global atomic_long *, __global long *, long); bool __ovld atomic_compare_exchange_strong(volatile __global atomic_long *, __local long *, long); bool __ovld atomic_compare_exchange_strong(volatile __global atomic_long *, __private long *, long); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_long *, __global long *, long); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_long *, __local long *, long); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_long *, __private long *, long); bool __ovld atomic_compare_exchange_strong(volatile __global atomic_ulong *, __global ulong *, ulong); bool __ovld atomic_compare_exchange_strong(volatile __global atomic_ulong *, __local ulong *, ulong); bool __ovld atomic_compare_exchange_strong(volatile __global atomic_ulong *, __private ulong *, ulong); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_ulong *, __global ulong *, ulong); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_ulong *, __local ulong *, ulong); bool __ovld atomic_compare_exchange_strong(volatile __local atomic_ulong *, __private ulong *, ulong); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_long *, __global long *, long); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_long *, __local long *, long); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_long *, __private long *, long); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_long *, __global long *, long); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_long *, __local long *, long); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_long *, __private long *, long); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_ulong *, __global ulong *, ulong); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_ulong *, __local ulong *, ulong); bool __ovld atomic_compare_exchange_weak(volatile __global atomic_ulong *, __private ulong *, ulong); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_ulong *, __global ulong *, ulong); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_ulong *, __local ulong *, ulong); bool __ovld atomic_compare_exchange_weak(volatile __local atomic_ulong *, __private ulong *, ulong); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) #endif #if defined(__opencl_c_atomic_scope_device) #if defined(__opencl_c_generic_address_space) bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *, int *, int, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_uint *, uint *, uint, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *, int *, int, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *, uint *, uint, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_float *, float *, float, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *, float *, float, memory_order, memory_order); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_double *, double *, double, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_double *, double *, double, memory_order, memory_order); #endif //cl_khr_fp64 bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_long *, long *, long, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *, long *, long, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_ulong *, ulong *, ulong, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *, ulong *, ulong, memory_order, memory_order); #endif #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_int *, __global int *, int, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_int *, __local int *, int, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_int *, __private int *, int, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_int *, __global int *, int, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_int *, __local int *, int, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_int *, __private int *, int, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_uint *, __global uint *, uint, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_uint *, __local uint *, uint, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_uint *, __private uint *, uint, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_uint *, __global uint *, uint, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_uint *, __local uint *, uint, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_uint *, __private uint *, uint, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_float *, __global float *, float, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_float *, __local float *, float, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_float *, __private float *, float, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_float *, __global float *, float, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_float *, __local float *, float, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_float *, __private float *, float, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_int *, __global int *, int, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_int *, __local int *, int, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_int *, __private int *, int, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_int *, __global int *, int, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_int *, __local int *, int, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_int *, __private int *, int, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_uint *, __global uint *, uint, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_uint *, __local uint *, uint, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_uint *, __private uint *, uint, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_uint *, __global uint *, uint, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_uint *, __local uint *, uint, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_uint *, __private uint *, uint, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_float *, __global float *, float, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_float *, __local float *, float, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_float *, __private float *, float, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_float *, __global float *, float, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_float *, __local float *, float, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_float *, __private float *, float, memory_order, memory_order); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_double *, __global double *, double, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_double *, __local double *, double, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_double *, __private double *, double, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_double *, __global double *, double, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_double *, __local double *, double, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_double *, __private double *, double, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_double *, __global double *, double, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_double *, __local double *, double, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_double *, __private double *, double, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_double *, __global double *, double, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_double *, __local double *, double, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_double *, __private double *, double, memory_order, memory_order); #endif //cl_khr_fp64 bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_long *, __global long *, long, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_long *, __local long *, long, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_long *, __private long *, long, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_long *, __global long *, long, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_long *, __local long *, long, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_long *, __private long *, long, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_ulong *, __global ulong *, ulong, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_ulong *, __local ulong *, ulong, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_ulong *, __private ulong *, ulong, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_ulong *, __global ulong *, ulong, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_ulong *, __local ulong *, ulong, memory_order, memory_order); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_ulong *, __private ulong *, ulong, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_long *, __global long *, long, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_long *, __local long *, long, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_long *, __private long *, long, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_long *, __global long *, long, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_long *, __local long *, long, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_long *, __private long *, long, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_ulong *, __global ulong *, ulong, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_ulong *, __local ulong *, ulong, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_ulong *, __private ulong *, ulong, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_ulong *, __global ulong *, ulong, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_ulong *, __local ulong *, ulong, memory_order, memory_order); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_ulong *, __private ulong *, ulong, memory_order, memory_order); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) #endif //defined(__opencl_c_atomic_scope_device) #if defined(__opencl_c_generic_address_space) bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *, int *, int, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_uint *, uint *, uint, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *, int *, int, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *, uint *, uint, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_float *, float *, float, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *, float *, float, memory_order, memory_order, memory_scope); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_double *, double *, double, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_double *, double *, double, memory_order, memory_order, memory_scope); #endif //cl_khr_fp64 bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_long *, long *, long, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *, long *, long, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_ulong *, ulong *, ulong, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *, ulong *, ulong, memory_order, memory_order, memory_scope); #endif #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_int *, __global int *, int, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_int *, __local int *, int, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_int *, __private int *, int, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_int *, __global int *, int, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_int *, __local int *, int, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_int *, __private int *, int, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_uint *, __global uint *, uint, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_uint *, __local uint *, uint, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_uint *, __private uint *, uint, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_uint *, __global uint *, uint, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_uint *, __local uint *, uint, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_uint *, __private uint *, uint, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_float *, __global float *, float, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_float *, __local float *, float, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_float *, __private float *, float, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_float *, __global float *, float, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_float *, __local float *, float, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_float *, __private float *, float, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_int *, __global int *, int, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_int *, __local int *, int, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_int *, __private int *, int, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_int *, __global int *, int, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_int *, __local int *, int, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_int *, __private int *, int, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_uint *, __global uint *, uint, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_uint *, __local uint *, uint, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_uint *, __private uint *, uint, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_uint *, __global uint *, uint, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_uint *, __local uint *, uint, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_uint *, __private uint *, uint, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_float *, __global float *, float, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_float *, __local float *, float, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_float *, __private float *, float, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_float *, __global float *, float, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_float *, __local float *, float, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_float *, __private float *, float, memory_order, memory_order, memory_scope); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_double *, __global double *, double, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_double *, __local double *, double, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_double *, __private double *, double, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_double *, __global double *, double, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_double *, __local double *, double, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_double *, __private double *, double, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_double *, __global double *, double, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_double *, __local double *, double, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_double *, __private double *, double, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_double *, __global double *, double, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_double *, __local double *, double, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_double *, __private double *, double, memory_order, memory_order, memory_scope); #endif //cl_khr_fp64 bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_long *, __global long *, long, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_long *, __local long *, long, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_long *, __private long *, long, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_long *, __global long *, long, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_long *, __local long *, long, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_long *, __private long *, long, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_ulong *, __global ulong *, ulong, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_ulong *, __local ulong *, ulong, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_ulong *, __private ulong *, ulong, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_ulong *, __global ulong *, ulong, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_ulong *, __local ulong *, ulong, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_ulong *, __private ulong *, ulong, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_long *, __global long *, long, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_long *, __local long *, long, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_long *, __private long *, long, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_long *, __global long *, long, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_long *, __local long *, long, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_long *, __private long *, long, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_ulong *, __global ulong *, ulong, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_ulong *, __local ulong *, ulong, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_ulong *, __private ulong *, ulong, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_ulong *, __global ulong *, ulong, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_ulong *, __local ulong *, ulong, memory_order, memory_order, memory_scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_ulong *, __private ulong *, ulong, memory_order, memory_order, memory_scope); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) // atomic_flag_test_and_set() and atomic_flag_clear() #if defined(__opencl_c_atomic_order_seq_cst) && defined(__opencl_c_atomic_scope_device) #if defined(__opencl_c_generic_address_space) bool __ovld atomic_flag_test_and_set(volatile atomic_flag *); void __ovld atomic_flag_clear(volatile atomic_flag *); #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) bool __ovld atomic_flag_test_and_set(volatile __global atomic_flag *); bool __ovld atomic_flag_test_and_set(volatile __local atomic_flag *); void __ovld atomic_flag_clear(volatile __global atomic_flag *); void __ovld atomic_flag_clear(volatile __local atomic_flag *); #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) #endif #if defined(__opencl_c_atomic_scope_device) #if defined(__opencl_c_generic_address_space) bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *, memory_order); void __ovld atomic_flag_clear_explicit(volatile atomic_flag *, memory_order); #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) bool __ovld atomic_flag_test_and_set_explicit(volatile __global atomic_flag *, memory_order); bool __ovld atomic_flag_test_and_set_explicit(volatile __local atomic_flag *, memory_order); void __ovld atomic_flag_clear_explicit(volatile __global atomic_flag *, memory_order); void __ovld atomic_flag_clear_explicit(volatile __local atomic_flag *, memory_order); #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) #endif #if defined(__opencl_c_generic_address_space) bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *, memory_order, memory_scope); void __ovld atomic_flag_clear_explicit(volatile atomic_flag *, memory_order, memory_scope); #endif //defined(__opencl_c_generic_address_space) #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) bool __ovld atomic_flag_test_and_set_explicit(volatile __global atomic_flag *, memory_order, memory_scope); bool __ovld atomic_flag_test_and_set_explicit(volatile __local atomic_flag *, memory_order, memory_scope); void __ovld atomic_flag_clear_explicit(volatile __global atomic_flag *, memory_order, memory_scope); void __ovld atomic_flag_clear_explicit(volatile __local atomic_flag *, memory_order, memory_scope); #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100) #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) // OpenCL v1.1 s6.11.12, v1.2 s6.12.12, v2.0 s6.13.12 - Miscellaneous Vector Functions /** * The shuffle and shuffle2 built-in functions construct * a permutation of elements from one or two input * vectors respectively that are of the same type, * returning a vector with the same element type as the * input and length that is the same as the shuffle mask. * The size of each element in the mask must match the * size of each element in the result. For shuffle, only * the ilogb(2m-1) least significant bits of each mask * element are considered. For shuffle2, only the * ilogb(2m-1)+1 least significant bits of each mask * element are considered. Other bits in the mask shall * be ignored. * The elements of the input vectors are numbered from * left to right across one or both of the vectors. For this * purpose, the number of elements in a vector is given * by vec_step(gentypem). The shuffle mask operand * specifies, for each element of the result vector, which * element of the one or two input vectors the result * element gets. * Examples: * uint4 mask = (uint4)(3, 2, * 1, 0); * float4 a; * float4 r = shuffle(a, mask); * // r.s0123 = a.wzyx * uint8 mask = (uint8)(0, 1, 2, 3, * 4, 5, 6, 7); * float4 a, b; * float8 r = shuffle2(a, b, mask); * // r.s0123 = a.xyzw * // r.s4567 = b.xyzw * uint4 mask; * float8 a; * float4 b; * b = shuffle(a, mask); * Examples that are not valid are: * uint8 mask; * short16 a; * short8 b; * b = shuffle(a, mask); <- not valid */ char2 __ovld __cnfn shuffle(char2, uchar2); char2 __ovld __cnfn shuffle(char4, uchar2); char2 __ovld __cnfn shuffle(char8, uchar2); char2 __ovld __cnfn shuffle(char16, uchar2); uchar2 __ovld __cnfn shuffle(uchar2, uchar2); uchar2 __ovld __cnfn shuffle(uchar4, uchar2); uchar2 __ovld __cnfn shuffle(uchar8, uchar2); uchar2 __ovld __cnfn shuffle(uchar16, uchar2); short2 __ovld __cnfn shuffle(short2, ushort2); short2 __ovld __cnfn shuffle(short4, ushort2); short2 __ovld __cnfn shuffle(short8, ushort2); short2 __ovld __cnfn shuffle(short16, ushort2); ushort2 __ovld __cnfn shuffle(ushort2, ushort2); ushort2 __ovld __cnfn shuffle(ushort4, ushort2); ushort2 __ovld __cnfn shuffle(ushort8, ushort2); ushort2 __ovld __cnfn shuffle(ushort16, ushort2); int2 __ovld __cnfn shuffle(int2, uint2); int2 __ovld __cnfn shuffle(int4, uint2); int2 __ovld __cnfn shuffle(int8, uint2); int2 __ovld __cnfn shuffle(int16, uint2); uint2 __ovld __cnfn shuffle(uint2, uint2); uint2 __ovld __cnfn shuffle(uint4, uint2); uint2 __ovld __cnfn shuffle(uint8, uint2); uint2 __ovld __cnfn shuffle(uint16, uint2); long2 __ovld __cnfn shuffle(long2, ulong2); long2 __ovld __cnfn shuffle(long4, ulong2); long2 __ovld __cnfn shuffle(long8, ulong2); long2 __ovld __cnfn shuffle(long16, ulong2); ulong2 __ovld __cnfn shuffle(ulong2, ulong2); ulong2 __ovld __cnfn shuffle(ulong4, ulong2); ulong2 __ovld __cnfn shuffle(ulong8, ulong2); ulong2 __ovld __cnfn shuffle(ulong16, ulong2); float2 __ovld __cnfn shuffle(float2, uint2); float2 __ovld __cnfn shuffle(float4, uint2); float2 __ovld __cnfn shuffle(float8, uint2); float2 __ovld __cnfn shuffle(float16, uint2); char4 __ovld __cnfn shuffle(char2, uchar4); char4 __ovld __cnfn shuffle(char4, uchar4); char4 __ovld __cnfn shuffle(char8, uchar4); char4 __ovld __cnfn shuffle(char16, uchar4); uchar4 __ovld __cnfn shuffle(uchar2, uchar4); uchar4 __ovld __cnfn shuffle(uchar4, uchar4); uchar4 __ovld __cnfn shuffle(uchar8, uchar4); uchar4 __ovld __cnfn shuffle(uchar16, uchar4); short4 __ovld __cnfn shuffle(short2, ushort4); short4 __ovld __cnfn shuffle(short4, ushort4); short4 __ovld __cnfn shuffle(short8, ushort4); short4 __ovld __cnfn shuffle(short16, ushort4); ushort4 __ovld __cnfn shuffle(ushort2, ushort4); ushort4 __ovld __cnfn shuffle(ushort4, ushort4); ushort4 __ovld __cnfn shuffle(ushort8, ushort4); ushort4 __ovld __cnfn shuffle(ushort16, ushort4); int4 __ovld __cnfn shuffle(int2, uint4); int4 __ovld __cnfn shuffle(int4, uint4); int4 __ovld __cnfn shuffle(int8, uint4); int4 __ovld __cnfn shuffle(int16, uint4); uint4 __ovld __cnfn shuffle(uint2, uint4); uint4 __ovld __cnfn shuffle(uint4, uint4); uint4 __ovld __cnfn shuffle(uint8, uint4); uint4 __ovld __cnfn shuffle(uint16, uint4); long4 __ovld __cnfn shuffle(long2, ulong4); long4 __ovld __cnfn shuffle(long4, ulong4); long4 __ovld __cnfn shuffle(long8, ulong4); long4 __ovld __cnfn shuffle(long16, ulong4); ulong4 __ovld __cnfn shuffle(ulong2, ulong4); ulong4 __ovld __cnfn shuffle(ulong4, ulong4); ulong4 __ovld __cnfn shuffle(ulong8, ulong4); ulong4 __ovld __cnfn shuffle(ulong16, ulong4); float4 __ovld __cnfn shuffle(float2, uint4); float4 __ovld __cnfn shuffle(float4, uint4); float4 __ovld __cnfn shuffle(float8, uint4); float4 __ovld __cnfn shuffle(float16, uint4); char8 __ovld __cnfn shuffle(char2, uchar8); char8 __ovld __cnfn shuffle(char4, uchar8); char8 __ovld __cnfn shuffle(char8, uchar8); char8 __ovld __cnfn shuffle(char16, uchar8); uchar8 __ovld __cnfn shuffle(uchar2, uchar8); uchar8 __ovld __cnfn shuffle(uchar4, uchar8); uchar8 __ovld __cnfn shuffle(uchar8, uchar8); uchar8 __ovld __cnfn shuffle(uchar16, uchar8); short8 __ovld __cnfn shuffle(short2, ushort8); short8 __ovld __cnfn shuffle(short4, ushort8); short8 __ovld __cnfn shuffle(short8, ushort8); short8 __ovld __cnfn shuffle(short16, ushort8); ushort8 __ovld __cnfn shuffle(ushort2, ushort8); ushort8 __ovld __cnfn shuffle(ushort4, ushort8); ushort8 __ovld __cnfn shuffle(ushort8, ushort8); ushort8 __ovld __cnfn shuffle(ushort16, ushort8); int8 __ovld __cnfn shuffle(int2, uint8); int8 __ovld __cnfn shuffle(int4, uint8); int8 __ovld __cnfn shuffle(int8, uint8); int8 __ovld __cnfn shuffle(int16, uint8); uint8 __ovld __cnfn shuffle(uint2, uint8); uint8 __ovld __cnfn shuffle(uint4, uint8); uint8 __ovld __cnfn shuffle(uint8, uint8); uint8 __ovld __cnfn shuffle(uint16, uint8); long8 __ovld __cnfn shuffle(long2, ulong8); long8 __ovld __cnfn shuffle(long4, ulong8); long8 __ovld __cnfn shuffle(long8, ulong8); long8 __ovld __cnfn shuffle(long16, ulong8); ulong8 __ovld __cnfn shuffle(ulong2, ulong8); ulong8 __ovld __cnfn shuffle(ulong4, ulong8); ulong8 __ovld __cnfn shuffle(ulong8, ulong8); ulong8 __ovld __cnfn shuffle(ulong16, ulong8); float8 __ovld __cnfn shuffle(float2, uint8); float8 __ovld __cnfn shuffle(float4, uint8); float8 __ovld __cnfn shuffle(float8, uint8); float8 __ovld __cnfn shuffle(float16, uint8); char16 __ovld __cnfn shuffle(char2, uchar16); char16 __ovld __cnfn shuffle(char4, uchar16); char16 __ovld __cnfn shuffle(char8, uchar16); char16 __ovld __cnfn shuffle(char16, uchar16); uchar16 __ovld __cnfn shuffle(uchar2, uchar16); uchar16 __ovld __cnfn shuffle(uchar4, uchar16); uchar16 __ovld __cnfn shuffle(uchar8, uchar16); uchar16 __ovld __cnfn shuffle(uchar16, uchar16); short16 __ovld __cnfn shuffle(short2, ushort16); short16 __ovld __cnfn shuffle(short4, ushort16); short16 __ovld __cnfn shuffle(short8, ushort16); short16 __ovld __cnfn shuffle(short16, ushort16); ushort16 __ovld __cnfn shuffle(ushort2, ushort16); ushort16 __ovld __cnfn shuffle(ushort4, ushort16); ushort16 __ovld __cnfn shuffle(ushort8, ushort16); ushort16 __ovld __cnfn shuffle(ushort16, ushort16); int16 __ovld __cnfn shuffle(int2, uint16); int16 __ovld __cnfn shuffle(int4, uint16); int16 __ovld __cnfn shuffle(int8, uint16); int16 __ovld __cnfn shuffle(int16, uint16); uint16 __ovld __cnfn shuffle(uint2, uint16); uint16 __ovld __cnfn shuffle(uint4, uint16); uint16 __ovld __cnfn shuffle(uint8, uint16); uint16 __ovld __cnfn shuffle(uint16, uint16); long16 __ovld __cnfn shuffle(long2, ulong16); long16 __ovld __cnfn shuffle(long4, ulong16); long16 __ovld __cnfn shuffle(long8, ulong16); long16 __ovld __cnfn shuffle(long16, ulong16); ulong16 __ovld __cnfn shuffle(ulong2, ulong16); ulong16 __ovld __cnfn shuffle(ulong4, ulong16); ulong16 __ovld __cnfn shuffle(ulong8, ulong16); ulong16 __ovld __cnfn shuffle(ulong16, ulong16); float16 __ovld __cnfn shuffle(float2, uint16); float16 __ovld __cnfn shuffle(float4, uint16); float16 __ovld __cnfn shuffle(float8, uint16); float16 __ovld __cnfn shuffle(float16, uint16); #ifdef cl_khr_fp64 double2 __ovld __cnfn shuffle(double2, ulong2); double2 __ovld __cnfn shuffle(double4, ulong2); double2 __ovld __cnfn shuffle(double8, ulong2); double2 __ovld __cnfn shuffle(double16, ulong2); double4 __ovld __cnfn shuffle(double2, ulong4); double4 __ovld __cnfn shuffle(double4, ulong4); double4 __ovld __cnfn shuffle(double8, ulong4); double4 __ovld __cnfn shuffle(double16, ulong4); double8 __ovld __cnfn shuffle(double2, ulong8); double8 __ovld __cnfn shuffle(double4, ulong8); double8 __ovld __cnfn shuffle(double8, ulong8); double8 __ovld __cnfn shuffle(double16, ulong8); double16 __ovld __cnfn shuffle(double2, ulong16); double16 __ovld __cnfn shuffle(double4, ulong16); double16 __ovld __cnfn shuffle(double8, ulong16); double16 __ovld __cnfn shuffle(double16, ulong16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half2 __ovld __cnfn shuffle(half2, ushort2); half2 __ovld __cnfn shuffle(half4, ushort2); half2 __ovld __cnfn shuffle(half8, ushort2); half2 __ovld __cnfn shuffle(half16, ushort2); half4 __ovld __cnfn shuffle(half2, ushort4); half4 __ovld __cnfn shuffle(half4, ushort4); half4 __ovld __cnfn shuffle(half8, ushort4); half4 __ovld __cnfn shuffle(half16, ushort4); half8 __ovld __cnfn shuffle(half2, ushort8); half8 __ovld __cnfn shuffle(half4, ushort8); half8 __ovld __cnfn shuffle(half8, ushort8); half8 __ovld __cnfn shuffle(half16, ushort8); half16 __ovld __cnfn shuffle(half2, ushort16); half16 __ovld __cnfn shuffle(half4, ushort16); half16 __ovld __cnfn shuffle(half8, ushort16); half16 __ovld __cnfn shuffle(half16, ushort16); #endif //cl_khr_fp16 char2 __ovld __cnfn shuffle2(char2, char2, uchar2); char2 __ovld __cnfn shuffle2(char4, char4, uchar2); char2 __ovld __cnfn shuffle2(char8, char8, uchar2); char2 __ovld __cnfn shuffle2(char16, char16, uchar2); uchar2 __ovld __cnfn shuffle2(uchar2, uchar2, uchar2); uchar2 __ovld __cnfn shuffle2(uchar4, uchar4, uchar2); uchar2 __ovld __cnfn shuffle2(uchar8, uchar8, uchar2); uchar2 __ovld __cnfn shuffle2(uchar16, uchar16, uchar2); short2 __ovld __cnfn shuffle2(short2, short2, ushort2); short2 __ovld __cnfn shuffle2(short4, short4, ushort2); short2 __ovld __cnfn shuffle2(short8, short8, ushort2); short2 __ovld __cnfn shuffle2(short16, short16, ushort2); ushort2 __ovld __cnfn shuffle2(ushort2, ushort2, ushort2); ushort2 __ovld __cnfn shuffle2(ushort4, ushort4, ushort2); ushort2 __ovld __cnfn shuffle2(ushort8, ushort8, ushort2); ushort2 __ovld __cnfn shuffle2(ushort16, ushort16, ushort2); int2 __ovld __cnfn shuffle2(int2, int2, uint2); int2 __ovld __cnfn shuffle2(int4, int4, uint2); int2 __ovld __cnfn shuffle2(int8, int8, uint2); int2 __ovld __cnfn shuffle2(int16, int16, uint2); uint2 __ovld __cnfn shuffle2(uint2, uint2, uint2); uint2 __ovld __cnfn shuffle2(uint4, uint4, uint2); uint2 __ovld __cnfn shuffle2(uint8, uint8, uint2); uint2 __ovld __cnfn shuffle2(uint16, uint16, uint2); long2 __ovld __cnfn shuffle2(long2, long2, ulong2); long2 __ovld __cnfn shuffle2(long4, long4, ulong2); long2 __ovld __cnfn shuffle2(long8, long8, ulong2); long2 __ovld __cnfn shuffle2(long16, long16, ulong2); ulong2 __ovld __cnfn shuffle2(ulong2, ulong2, ulong2); ulong2 __ovld __cnfn shuffle2(ulong4, ulong4, ulong2); ulong2 __ovld __cnfn shuffle2(ulong8, ulong8, ulong2); ulong2 __ovld __cnfn shuffle2(ulong16, ulong16, ulong2); float2 __ovld __cnfn shuffle2(float2, float2, uint2); float2 __ovld __cnfn shuffle2(float4, float4, uint2); float2 __ovld __cnfn shuffle2(float8, float8, uint2); float2 __ovld __cnfn shuffle2(float16, float16, uint2); char4 __ovld __cnfn shuffle2(char2, char2, uchar4); char4 __ovld __cnfn shuffle2(char4, char4, uchar4); char4 __ovld __cnfn shuffle2(char8, char8, uchar4); char4 __ovld __cnfn shuffle2(char16, char16, uchar4); uchar4 __ovld __cnfn shuffle2(uchar2, uchar2, uchar4); uchar4 __ovld __cnfn shuffle2(uchar4, uchar4, uchar4); uchar4 __ovld __cnfn shuffle2(uchar8, uchar8, uchar4); uchar4 __ovld __cnfn shuffle2(uchar16, uchar16, uchar4); short4 __ovld __cnfn shuffle2(short2, short2, ushort4); short4 __ovld __cnfn shuffle2(short4, short4, ushort4); short4 __ovld __cnfn shuffle2(short8, short8, ushort4); short4 __ovld __cnfn shuffle2(short16, short16, ushort4); ushort4 __ovld __cnfn shuffle2(ushort2, ushort2, ushort4); ushort4 __ovld __cnfn shuffle2(ushort4, ushort4, ushort4); ushort4 __ovld __cnfn shuffle2(ushort8, ushort8, ushort4); ushort4 __ovld __cnfn shuffle2(ushort16, ushort16, ushort4); int4 __ovld __cnfn shuffle2(int2, int2, uint4); int4 __ovld __cnfn shuffle2(int4, int4, uint4); int4 __ovld __cnfn shuffle2(int8, int8, uint4); int4 __ovld __cnfn shuffle2(int16, int16, uint4); uint4 __ovld __cnfn shuffle2(uint2, uint2, uint4); uint4 __ovld __cnfn shuffle2(uint4, uint4, uint4); uint4 __ovld __cnfn shuffle2(uint8, uint8, uint4); uint4 __ovld __cnfn shuffle2(uint16, uint16, uint4); long4 __ovld __cnfn shuffle2(long2, long2, ulong4); long4 __ovld __cnfn shuffle2(long4, long4, ulong4); long4 __ovld __cnfn shuffle2(long8, long8, ulong4); long4 __ovld __cnfn shuffle2(long16, long16, ulong4); ulong4 __ovld __cnfn shuffle2(ulong2, ulong2, ulong4); ulong4 __ovld __cnfn shuffle2(ulong4, ulong4, ulong4); ulong4 __ovld __cnfn shuffle2(ulong8, ulong8, ulong4); ulong4 __ovld __cnfn shuffle2(ulong16, ulong16, ulong4); float4 __ovld __cnfn shuffle2(float2, float2, uint4); float4 __ovld __cnfn shuffle2(float4, float4, uint4); float4 __ovld __cnfn shuffle2(float8, float8, uint4); float4 __ovld __cnfn shuffle2(float16, float16, uint4); char8 __ovld __cnfn shuffle2(char2, char2, uchar8); char8 __ovld __cnfn shuffle2(char4, char4, uchar8); char8 __ovld __cnfn shuffle2(char8, char8, uchar8); char8 __ovld __cnfn shuffle2(char16, char16, uchar8); uchar8 __ovld __cnfn shuffle2(uchar2, uchar2, uchar8); uchar8 __ovld __cnfn shuffle2(uchar4, uchar4, uchar8); uchar8 __ovld __cnfn shuffle2(uchar8, uchar8, uchar8); uchar8 __ovld __cnfn shuffle2(uchar16, uchar16, uchar8); short8 __ovld __cnfn shuffle2(short2, short2, ushort8); short8 __ovld __cnfn shuffle2(short4, short4, ushort8); short8 __ovld __cnfn shuffle2(short8, short8, ushort8); short8 __ovld __cnfn shuffle2(short16, short16, ushort8); ushort8 __ovld __cnfn shuffle2(ushort2, ushort2, ushort8); ushort8 __ovld __cnfn shuffle2(ushort4, ushort4, ushort8); ushort8 __ovld __cnfn shuffle2(ushort8, ushort8, ushort8); ushort8 __ovld __cnfn shuffle2(ushort16, ushort16, ushort8); int8 __ovld __cnfn shuffle2(int2, int2, uint8); int8 __ovld __cnfn shuffle2(int4, int4, uint8); int8 __ovld __cnfn shuffle2(int8, int8, uint8); int8 __ovld __cnfn shuffle2(int16, int16, uint8); uint8 __ovld __cnfn shuffle2(uint2, uint2, uint8); uint8 __ovld __cnfn shuffle2(uint4, uint4, uint8); uint8 __ovld __cnfn shuffle2(uint8, uint8, uint8); uint8 __ovld __cnfn shuffle2(uint16, uint16, uint8); long8 __ovld __cnfn shuffle2(long2, long2, ulong8); long8 __ovld __cnfn shuffle2(long4, long4, ulong8); long8 __ovld __cnfn shuffle2(long8, long8, ulong8); long8 __ovld __cnfn shuffle2(long16, long16, ulong8); ulong8 __ovld __cnfn shuffle2(ulong2, ulong2, ulong8); ulong8 __ovld __cnfn shuffle2(ulong4, ulong4, ulong8); ulong8 __ovld __cnfn shuffle2(ulong8, ulong8, ulong8); ulong8 __ovld __cnfn shuffle2(ulong16, ulong16, ulong8); float8 __ovld __cnfn shuffle2(float2, float2, uint8); float8 __ovld __cnfn shuffle2(float4, float4, uint8); float8 __ovld __cnfn shuffle2(float8, float8, uint8); float8 __ovld __cnfn shuffle2(float16, float16, uint8); char16 __ovld __cnfn shuffle2(char2, char2, uchar16); char16 __ovld __cnfn shuffle2(char4, char4, uchar16); char16 __ovld __cnfn shuffle2(char8, char8, uchar16); char16 __ovld __cnfn shuffle2(char16, char16, uchar16); uchar16 __ovld __cnfn shuffle2(uchar2, uchar2, uchar16); uchar16 __ovld __cnfn shuffle2(uchar4, uchar4, uchar16); uchar16 __ovld __cnfn shuffle2(uchar8, uchar8, uchar16); uchar16 __ovld __cnfn shuffle2(uchar16, uchar16, uchar16); short16 __ovld __cnfn shuffle2(short2, short2, ushort16); short16 __ovld __cnfn shuffle2(short4, short4, ushort16); short16 __ovld __cnfn shuffle2(short8, short8, ushort16); short16 __ovld __cnfn shuffle2(short16, short16, ushort16); ushort16 __ovld __cnfn shuffle2(ushort2, ushort2, ushort16); ushort16 __ovld __cnfn shuffle2(ushort4, ushort4, ushort16); ushort16 __ovld __cnfn shuffle2(ushort8, ushort8, ushort16); ushort16 __ovld __cnfn shuffle2(ushort16, ushort16, ushort16); int16 __ovld __cnfn shuffle2(int2, int2, uint16); int16 __ovld __cnfn shuffle2(int4, int4, uint16); int16 __ovld __cnfn shuffle2(int8, int8, uint16); int16 __ovld __cnfn shuffle2(int16, int16, uint16); uint16 __ovld __cnfn shuffle2(uint2, uint2, uint16); uint16 __ovld __cnfn shuffle2(uint4, uint4, uint16); uint16 __ovld __cnfn shuffle2(uint8, uint8, uint16); uint16 __ovld __cnfn shuffle2(uint16, uint16, uint16); long16 __ovld __cnfn shuffle2(long2, long2, ulong16); long16 __ovld __cnfn shuffle2(long4, long4, ulong16); long16 __ovld __cnfn shuffle2(long8, long8, ulong16); long16 __ovld __cnfn shuffle2(long16, long16, ulong16); ulong16 __ovld __cnfn shuffle2(ulong2, ulong2, ulong16); ulong16 __ovld __cnfn shuffle2(ulong4, ulong4, ulong16); ulong16 __ovld __cnfn shuffle2(ulong8, ulong8, ulong16); ulong16 __ovld __cnfn shuffle2(ulong16, ulong16, ulong16); float16 __ovld __cnfn shuffle2(float2, float2, uint16); float16 __ovld __cnfn shuffle2(float4, float4, uint16); float16 __ovld __cnfn shuffle2(float8, float8, uint16); float16 __ovld __cnfn shuffle2(float16, float16, uint16); #ifdef cl_khr_fp64 double2 __ovld __cnfn shuffle2(double2, double2, ulong2); double2 __ovld __cnfn shuffle2(double4, double4, ulong2); double2 __ovld __cnfn shuffle2(double8, double8, ulong2); double2 __ovld __cnfn shuffle2(double16, double16, ulong2); double4 __ovld __cnfn shuffle2(double2, double2, ulong4); double4 __ovld __cnfn shuffle2(double4, double4, ulong4); double4 __ovld __cnfn shuffle2(double8, double8, ulong4); double4 __ovld __cnfn shuffle2(double16, double16, ulong4); double8 __ovld __cnfn shuffle2(double2, double2, ulong8); double8 __ovld __cnfn shuffle2(double4, double4, ulong8); double8 __ovld __cnfn shuffle2(double8, double8, ulong8); double8 __ovld __cnfn shuffle2(double16, double16, ulong8); double16 __ovld __cnfn shuffle2(double2, double2, ulong16); double16 __ovld __cnfn shuffle2(double4, double4, ulong16); double16 __ovld __cnfn shuffle2(double8, double8, ulong16); double16 __ovld __cnfn shuffle2(double16, double16, ulong16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half2 __ovld __cnfn shuffle2(half2, half2, ushort2); half2 __ovld __cnfn shuffle2(half4, half4, ushort2); half2 __ovld __cnfn shuffle2(half8, half8, ushort2); half2 __ovld __cnfn shuffle2(half16, half16, ushort2); half4 __ovld __cnfn shuffle2(half2, half2, ushort4); half4 __ovld __cnfn shuffle2(half4, half4, ushort4); half4 __ovld __cnfn shuffle2(half8, half8, ushort4); half4 __ovld __cnfn shuffle2(half16, half16, ushort4); half8 __ovld __cnfn shuffle2(half2, half2, ushort8); half8 __ovld __cnfn shuffle2(half4, half4, ushort8); half8 __ovld __cnfn shuffle2(half8, half8, ushort8); half8 __ovld __cnfn shuffle2(half16, half16, ushort8); half16 __ovld __cnfn shuffle2(half2, half2, ushort16); half16 __ovld __cnfn shuffle2(half4, half4, ushort16); half16 __ovld __cnfn shuffle2(half8, half8, ushort16); half16 __ovld __cnfn shuffle2(half16, half16, ushort16); #endif //cl_khr_fp16 // OpenCL v1.1 s6.11.3, v1.2 s6.12.14, v2.0 s6.13.14 - Image Read and Write Functions #ifdef cl_khr_gl_msaa_sharing #pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable #endif //cl_khr_gl_msaa_sharing /** * Use the coordinate (coord.xy) to do an element lookup in * the 2D image object specified by image. * * Use the coordinate (coord.x, coord.y, coord.z) to do * an element lookup in the 3D image object specified * by image. coord.w is ignored. * * Use the coordinate (coord.z) to index into the * 2D image array object specified by image_array * and (coord.x, coord.y) to do an element lookup in * the 2D image object specified by image. * * Use the coordinate (x) to do an element lookup in * the 1D image object specified by image. * * Use the coordinate (coord.y) to index into the * 1D image array object specified by image_array * and (coord.x) to do an element lookup in * the 1D image object specified by image. * * Use the coordinate (cood.xy) and sample to do an * element lookup in the 2D multi-sample image specified * by image. * * Use coord.xy and sample to do an element * lookup in the 2D multi-sample image layer * identified by index coord.z in the 2D multi-sample * image array specified by image. * * For mipmap images, use the mip-level specified by * the Level-of-Detail (lod) or use gradients for LOD * computation. * * read_imagef returns floating-point values in the * range [0.0 ... 1.0] for image objects created with * image_channel_data_type set to one of the predefined * packed formats or CL_UNORM_INT8, or * CL_UNORM_INT16. * * read_imagef returns floating-point values in the * range [-1.0 ... 1.0] for image objects created with * image_channel_data_type set to CL_SNORM_INT8, * or CL_SNORM_INT16. * * read_imagef returns floating-point values for image * objects created with image_channel_data_type set to * CL_HALF_FLOAT or CL_FLOAT. * * read_imagei and read_imageui return * unnormalized signed integer and unsigned integer * values respectively. Each channel will be stored in a * 32-bit integer. * * read_imagei can only be used with image objects * created with image_channel_data_type set to one of * the following values: * CL_SIGNED_INT8, * CL_SIGNED_INT16 and * CL_SIGNED_INT32. * If the image_channel_data_type is not one of the * above values, the values returned by read_imagei * are undefined. * * read_imageui can only be used with image objects * created with image_channel_data_type set to one of * the following values: * CL_UNSIGNED_INT8, * CL_UNSIGNED_INT16 and * CL_UNSIGNED_INT32. * If the image_channel_data_type is not one of the * above values, the values returned by read_imageui * are undefined. * * The read_image{i|ui} calls support a nearest filter * only. The filter_mode specified in sampler * must be set to CLK_FILTER_NEAREST; otherwise * the values returned are undefined. * The read_image{f|i|ui} calls that take * integer coordinates must use a sampler with * normalized coordinates set to * CLK_NORMALIZED_COORDS_FALSE and * addressing mode set to * CLK_ADDRESS_CLAMP_TO_EDGE, * CLK_ADDRESS_CLAMP or CLK_ADDRESS_NONE; * otherwise the values returned are undefined. * * Values returned by read_imagef for image objects * with image_channel_data_type values not specified * in the description above are undefined. */ float4 __ovld __purefn read_imagef(read_only image2d_t, sampler_t, int2); float4 __ovld __purefn read_imagef(read_only image2d_t, sampler_t, float2); int4 __ovld __purefn read_imagei(read_only image2d_t, sampler_t, int2); int4 __ovld __purefn read_imagei(read_only image2d_t, sampler_t, float2); uint4 __ovld __purefn read_imageui(read_only image2d_t, sampler_t, int2); uint4 __ovld __purefn read_imageui(read_only image2d_t, sampler_t, float2); float4 __ovld __purefn read_imagef(read_only image3d_t, sampler_t, int4); float4 __ovld __purefn read_imagef(read_only image3d_t, sampler_t, float4); int4 __ovld __purefn read_imagei(read_only image3d_t, sampler_t, int4); int4 __ovld __purefn read_imagei(read_only image3d_t, sampler_t, float4); uint4 __ovld __purefn read_imageui(read_only image3d_t, sampler_t, int4); uint4 __ovld __purefn read_imageui(read_only image3d_t, sampler_t, float4); #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) float4 __ovld __purefn read_imagef(read_only image2d_array_t, sampler_t, int4); float4 __ovld __purefn read_imagef(read_only image2d_array_t, sampler_t, float4); int4 __ovld __purefn read_imagei(read_only image2d_array_t, sampler_t, int4); int4 __ovld __purefn read_imagei(read_only image2d_array_t, sampler_t, float4); uint4 __ovld __purefn read_imageui(read_only image2d_array_t, sampler_t, int4); uint4 __ovld __purefn read_imageui(read_only image2d_array_t, sampler_t, float4); #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) float4 __ovld __purefn read_imagef(read_only image1d_t, sampler_t, int); float4 __ovld __purefn read_imagef(read_only image1d_t, sampler_t, float); int4 __ovld __purefn read_imagei(read_only image1d_t, sampler_t, int); int4 __ovld __purefn read_imagei(read_only image1d_t, sampler_t, float); uint4 __ovld __purefn read_imageui(read_only image1d_t, sampler_t, int); uint4 __ovld __purefn read_imageui(read_only image1d_t, sampler_t, float); #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) float4 __ovld __purefn read_imagef(read_only image1d_array_t, sampler_t, int2); float4 __ovld __purefn read_imagef(read_only image1d_array_t, sampler_t, float2); int4 __ovld __purefn read_imagei(read_only image1d_array_t, sampler_t, int2); int4 __ovld __purefn read_imagei(read_only image1d_array_t, sampler_t, float2); uint4 __ovld __purefn read_imageui(read_only image1d_array_t, sampler_t, int2); uint4 __ovld __purefn read_imageui(read_only image1d_array_t, sampler_t, float2); #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) #ifdef cl_khr_depth_images float __ovld __purefn read_imagef(read_only image2d_depth_t, sampler_t, float2); float __ovld __purefn read_imagef(read_only image2d_depth_t, sampler_t, int2); float __ovld __purefn read_imagef(read_only image2d_array_depth_t, sampler_t, float4); float __ovld __purefn read_imagef(read_only image2d_array_depth_t, sampler_t, int4); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) float4 __ovld __purefn read_imagef(read_only image2d_msaa_t, int2, int); int4 __ovld __purefn read_imagei(read_only image2d_msaa_t, int2, int); uint4 __ovld __purefn read_imageui(read_only image2d_msaa_t, int2, int); float __ovld __purefn read_imagef(read_only image2d_msaa_depth_t, int2, int); float4 __ovld __purefn read_imagef(read_only image2d_array_msaa_t, int4, int); int4 __ovld __purefn read_imagei(read_only image2d_array_msaa_t, int4, int); uint4 __ovld __purefn read_imageui(read_only image2d_array_msaa_t, int4, int); float __ovld __purefn read_imagef(read_only image2d_array_msaa_depth_t, int4, int); #endif //cl_khr_gl_msaa_sharing // OpenCL Extension v2.0 s9.18 - Mipmaps #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) #ifdef cl_khr_mipmap_image float4 __ovld __purefn read_imagef(read_only image1d_t, sampler_t, float, float); int4 __ovld __purefn read_imagei(read_only image1d_t, sampler_t, float, float); uint4 __ovld __purefn read_imageui(read_only image1d_t, sampler_t, float, float); float4 __ovld __purefn read_imagef(read_only image1d_array_t, sampler_t, float2, float); int4 __ovld __purefn read_imagei(read_only image1d_array_t, sampler_t, float2, float); uint4 __ovld __purefn read_imageui(read_only image1d_array_t, sampler_t, float2, float); float4 __ovld __purefn read_imagef(read_only image2d_t, sampler_t, float2, float); int4 __ovld __purefn read_imagei(read_only image2d_t, sampler_t, float2, float); uint4 __ovld __purefn read_imageui(read_only image2d_t, sampler_t, float2, float); #ifdef cl_khr_depth_images float __ovld __purefn read_imagef(read_only image2d_depth_t, sampler_t, float2, float); #endif // cl_khr_depth_images float4 __ovld __purefn read_imagef(read_only image2d_array_t, sampler_t, float4, float); int4 __ovld __purefn read_imagei(read_only image2d_array_t, sampler_t, float4, float); uint4 __ovld __purefn read_imageui(read_only image2d_array_t, sampler_t, float4, float); #ifdef cl_khr_depth_images float __ovld __purefn read_imagef(read_only image2d_array_depth_t, sampler_t, float4, float); #endif // cl_khr_depth_images float4 __ovld __purefn read_imagef(read_only image3d_t, sampler_t, float4, float); int4 __ovld __purefn read_imagei(read_only image3d_t, sampler_t, float4, float); uint4 __ovld __purefn read_imageui(read_only image3d_t, sampler_t, float4, float); float4 __ovld __purefn read_imagef(read_only image1d_t, sampler_t, float, float, float); int4 __ovld __purefn read_imagei(read_only image1d_t, sampler_t, float, float, float); uint4 __ovld __purefn read_imageui(read_only image1d_t, sampler_t, float, float, float); float4 __ovld __purefn read_imagef(read_only image1d_array_t, sampler_t, float2, float, float); int4 __ovld __purefn read_imagei(read_only image1d_array_t, sampler_t, float2, float, float); uint4 __ovld __purefn read_imageui(read_only image1d_array_t, sampler_t, float2, float, float); float4 __ovld __purefn read_imagef(read_only image2d_t, sampler_t, float2, float2, float2); int4 __ovld __purefn read_imagei(read_only image2d_t, sampler_t, float2, float2, float2); uint4 __ovld __purefn read_imageui(read_only image2d_t, sampler_t, float2, float2, float2); #ifdef cl_khr_depth_images float __ovld __purefn read_imagef(read_only image2d_depth_t, sampler_t, float2, float2, float2); #endif // cl_khr_depth_images float4 __ovld __purefn read_imagef(read_only image2d_array_t, sampler_t, float4, float2, float2); int4 __ovld __purefn read_imagei(read_only image2d_array_t, sampler_t, float4, float2, float2); uint4 __ovld __purefn read_imageui(read_only image2d_array_t, sampler_t, float4, float2, float2); #ifdef cl_khr_depth_images float __ovld __purefn read_imagef(read_only image2d_array_depth_t, sampler_t, float4, float2, float2); #endif // cl_khr_depth_images float4 __ovld __purefn read_imagef(read_only image3d_t, sampler_t, float4, float4, float4); int4 __ovld __purefn read_imagei(read_only image3d_t, sampler_t, float4, float4, float4); uint4 __ovld __purefn read_imageui(read_only image3d_t, sampler_t, float4, float4, float4); #endif //cl_khr_mipmap_image #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) /** * Sampler-less Image Access */ float4 __ovld __purefn read_imagef(read_only image1d_t, int); int4 __ovld __purefn read_imagei(read_only image1d_t, int); uint4 __ovld __purefn read_imageui(read_only image1d_t, int); float4 __ovld __purefn read_imagef(read_only image1d_buffer_t, int); int4 __ovld __purefn read_imagei(read_only image1d_buffer_t, int); uint4 __ovld __purefn read_imageui(read_only image1d_buffer_t, int); float4 __ovld __purefn read_imagef(read_only image1d_array_t, int2); int4 __ovld __purefn read_imagei(read_only image1d_array_t, int2); uint4 __ovld __purefn read_imageui(read_only image1d_array_t, int2); float4 __ovld __purefn read_imagef(read_only image2d_t, int2); int4 __ovld __purefn read_imagei(read_only image2d_t, int2); uint4 __ovld __purefn read_imageui(read_only image2d_t, int2); float4 __ovld __purefn read_imagef(read_only image2d_array_t, int4); int4 __ovld __purefn read_imagei(read_only image2d_array_t, int4); uint4 __ovld __purefn read_imageui(read_only image2d_array_t, int4); #ifdef cl_khr_depth_images float __ovld __purefn read_imagef(read_only image2d_depth_t, int2); float __ovld __purefn read_imagef(read_only image2d_array_depth_t, int4); #endif //cl_khr_depth_images float4 __ovld __purefn read_imagef(read_only image3d_t, int4); int4 __ovld __purefn read_imagei(read_only image3d_t, int4); uint4 __ovld __purefn read_imageui(read_only image3d_t, int4); #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) // Image read functions returning half4 type #ifdef cl_khr_fp16 half4 __ovld __purefn read_imageh(read_only image1d_t, sampler_t, int); half4 __ovld __purefn read_imageh(read_only image1d_t, sampler_t, float); half4 __ovld __purefn read_imageh(read_only image2d_t, sampler_t, int2); half4 __ovld __purefn read_imageh(read_only image2d_t, sampler_t, float2); half4 __ovld __purefn read_imageh(read_only image3d_t, sampler_t, int4); half4 __ovld __purefn read_imageh(read_only image3d_t, sampler_t, float4); #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) half4 __ovld __purefn read_imageh(read_only image1d_array_t, sampler_t, int2); half4 __ovld __purefn read_imageh(read_only image1d_array_t, sampler_t, float2); half4 __ovld __purefn read_imageh(read_only image2d_array_t, sampler_t, int4); half4 __ovld __purefn read_imageh(read_only image2d_array_t, sampler_t, float4); /** * Sampler-less Image Access */ half4 __ovld __purefn read_imageh(read_only image1d_t, int); half4 __ovld __purefn read_imageh(read_only image2d_t, int2); half4 __ovld __purefn read_imageh(read_only image3d_t, int4); half4 __ovld __purefn read_imageh(read_only image1d_array_t, int2); half4 __ovld __purefn read_imageh(read_only image2d_array_t, int4); half4 __ovld __purefn read_imageh(read_only image1d_buffer_t, int); #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) #endif //cl_khr_fp16 // Image read functions for read_write images #if defined(__opencl_c_read_write_images) float4 __ovld __purefn read_imagef(read_write image1d_t, int); int4 __ovld __purefn read_imagei(read_write image1d_t, int); uint4 __ovld __purefn read_imageui(read_write image1d_t, int); float4 __ovld __purefn read_imagef(read_write image1d_buffer_t, int); int4 __ovld __purefn read_imagei(read_write image1d_buffer_t, int); uint4 __ovld __purefn read_imageui(read_write image1d_buffer_t, int); float4 __ovld __purefn read_imagef(read_write image1d_array_t, int2); int4 __ovld __purefn read_imagei(read_write image1d_array_t, int2); uint4 __ovld __purefn read_imageui(read_write image1d_array_t, int2); float4 __ovld __purefn read_imagef(read_write image2d_t, int2); int4 __ovld __purefn read_imagei(read_write image2d_t, int2); uint4 __ovld __purefn read_imageui(read_write image2d_t, int2); float4 __ovld __purefn read_imagef(read_write image2d_array_t, int4); int4 __ovld __purefn read_imagei(read_write image2d_array_t, int4); uint4 __ovld __purefn read_imageui(read_write image2d_array_t, int4); #ifdef cl_khr_3d_image_writes float4 __ovld __purefn read_imagef(read_write image3d_t, int4); int4 __ovld __purefn read_imagei(read_write image3d_t, int4); uint4 __ovld __purefn read_imageui(read_write image3d_t, int4); #endif // cl_khr_3d_image_writes #ifdef cl_khr_depth_images float __ovld __purefn read_imagef(read_write image2d_depth_t, int2); float __ovld __purefn read_imagef(read_write image2d_array_depth_t, int4); #endif //cl_khr_depth_images #if cl_khr_gl_msaa_sharing float4 __ovld __purefn read_imagef(read_write image2d_msaa_t, int2, int); int4 __ovld __purefn read_imagei(read_write image2d_msaa_t, int2, int); uint4 __ovld __purefn read_imageui(read_write image2d_msaa_t, int2, int); float4 __ovld __purefn read_imagef(read_write image2d_array_msaa_t, int4, int); int4 __ovld __purefn read_imagei(read_write image2d_array_msaa_t, int4, int); uint4 __ovld __purefn read_imageui(read_write image2d_array_msaa_t, int4, int); float __ovld __purefn read_imagef(read_write image2d_msaa_depth_t, int2, int); float __ovld __purefn read_imagef(read_write image2d_array_msaa_depth_t, int4, int); #endif //cl_khr_gl_msaa_sharing #ifdef cl_khr_mipmap_image float4 __ovld __purefn read_imagef(read_write image1d_t, sampler_t, float, float); int4 __ovld __purefn read_imagei(read_write image1d_t, sampler_t, float, float); uint4 __ovld __purefn read_imageui(read_write image1d_t, sampler_t, float, float); float4 __ovld __purefn read_imagef(read_write image1d_array_t, sampler_t, float2, float); int4 __ovld __purefn read_imagei(read_write image1d_array_t, sampler_t, float2, float); uint4 __ovld __purefn read_imageui(read_write image1d_array_t, sampler_t, float2, float); float4 __ovld __purefn read_imagef(read_write image2d_t, sampler_t, float2, float); int4 __ovld __purefn read_imagei(read_write image2d_t, sampler_t, float2, float); uint4 __ovld __purefn read_imageui(read_write image2d_t, sampler_t, float2, float); float __ovld __purefn read_imagef(read_write image2d_depth_t, sampler_t, float2, float); float4 __ovld __purefn read_imagef(read_write image2d_array_t, sampler_t, float4, float); int4 __ovld __purefn read_imagei(read_write image2d_array_t, sampler_t, float4, float); uint4 __ovld __purefn read_imageui(read_write image2d_array_t, sampler_t, float4, float); float __ovld __purefn read_imagef(read_write image2d_array_depth_t, sampler_t, float4, float); #ifdef cl_khr_3d_image_writes float4 __ovld __purefn read_imagef(read_write image3d_t, sampler_t, float4, float); int4 __ovld __purefn read_imagei(read_write image3d_t, sampler_t, float4, float); uint4 __ovld __purefn read_imageui(read_write image3d_t, sampler_t, float4, float); #endif // cl_khr_3d_image_writes float4 __ovld __purefn read_imagef(read_write image1d_t, sampler_t, float, float, float); int4 __ovld __purefn read_imagei(read_write image1d_t, sampler_t, float, float, float); uint4 __ovld __purefn read_imageui(read_write image1d_t, sampler_t, float, float, float); float4 __ovld __purefn read_imagef(read_write image1d_array_t, sampler_t, float2, float, float); int4 __ovld __purefn read_imagei(read_write image1d_array_t, sampler_t, float2, float, float); uint4 __ovld __purefn read_imageui(read_write image1d_array_t, sampler_t, float2, float, float); float4 __ovld __purefn read_imagef(read_write image2d_t, sampler_t, float2, float2, float2); int4 __ovld __purefn read_imagei(read_write image2d_t, sampler_t, float2, float2, float2); uint4 __ovld __purefn read_imageui(read_write image2d_t, sampler_t, float2, float2, float2); float __ovld __purefn read_imagef(read_write image2d_depth_t, sampler_t, float2, float2, float2); float4 __ovld __purefn read_imagef(read_write image2d_array_t, sampler_t, float4, float2, float2); int4 __ovld __purefn read_imagei(read_write image2d_array_t, sampler_t, float4, float2, float2); uint4 __ovld __purefn read_imageui(read_write image2d_array_t, sampler_t, float4, float2, float2); float __ovld __purefn read_imagef(read_write image2d_array_depth_t, sampler_t, float4, float2, float2); #ifdef cl_khr_3d_image_writes float4 __ovld __purefn read_imagef(read_write image3d_t, sampler_t, float4, float4, float4); int4 __ovld __purefn read_imagei(read_write image3d_t, sampler_t, float4, float4, float4); uint4 __ovld __purefn read_imageui(read_write image3d_t, sampler_t, float4, float4, float4); #endif // cl_khr_3d_image_writes #endif //cl_khr_mipmap_image // Image read functions returning half4 type #ifdef cl_khr_fp16 half4 __ovld __purefn read_imageh(read_write image1d_t, int); half4 __ovld __purefn read_imageh(read_write image2d_t, int2); #ifdef cl_khr_3d_image_writes half4 __ovld __purefn read_imageh(read_write image3d_t, int4); #endif // cl_khr_3d_image_writes half4 __ovld __purefn read_imageh(read_write image1d_array_t, int2); half4 __ovld __purefn read_imageh(read_write image2d_array_t, int4); half4 __ovld __purefn read_imageh(read_write image1d_buffer_t, int); #endif //cl_khr_fp16 #endif //defined(__opencl_c_read_write_images) /** * Write color value to location specified by coordinate * (coord.x, coord.y) in the 2D image object specified by image. * (coord.x, coord.y) are considered to be unnormalized coordinates * and must be in the range 0 ... image width - 1, and 0 * ... image height - 1. * Write color value to location specified by coordinate * (coord.x, coord.y) in the 2D image object specified by index * (coord.z) of the 2D image array object image_array. * (coord.x, coord.y) are considered to be unnormalized * coordinates and must be in the range 0 ... image width * - 1. * * Write color value to location specified by coordinate * (coord) in the 1D image (buffer) object specified by image. * coord is considered to be unnormalized coordinates * and must be in the range 0 ... image width - 1. * * Write color value to location specified by coordinate * (coord.x) in the 1D image object specified by index * (coord.y) of the 1D image array object image_array. * x is considered to be unnormalized coordinates * and must be in the range 0 ... image width - 1. * * Write color value to location specified by coordinate * (coord.x, coord.y, coord.z) in the 3D image object specified by image. * coord.x & coord.y are considered to be unnormalized coordinates * and must be in the range 0 ... image width - 1, and 0 * ... image height - 1. * * For mipmap images, use mip-level specified by lod. * * Appropriate data format conversion to the specified * image format is done before writing the color value. * * write_imagef can only be used with image objects * created with image_channel_data_type set to one of * the pre-defined packed formats or set to * CL_SNORM_INT8, CL_UNORM_INT8, * CL_SNORM_INT16, CL_UNORM_INT16, * CL_HALF_FLOAT or CL_FLOAT. Appropriate data * format conversion will be done to convert channel * data from a floating-point value to actual data format * in which the channels are stored. * * write_imagei can only be used with image objects * created with image_channel_data_type set to one of * the following values: * CL_SIGNED_INT8, * CL_SIGNED_INT16 and * CL_SIGNED_INT32. * * write_imageui can only be used with image objects * created with image_channel_data_type set to one of * the following values: * CL_UNSIGNED_INT8, * CL_UNSIGNED_INT16 and * CL_UNSIGNED_INT32. * * The behavior of write_imagef, write_imagei and * write_imageui for image objects created with * image_channel_data_type values not specified in * the description above or with (x, y) coordinate * values that are not in the range (0 ... image width -1, * 0 ... image height - 1), respectively, is undefined. */ void __ovld write_imagef(write_only image2d_t, int2, float4); void __ovld write_imagei(write_only image2d_t, int2, int4); void __ovld write_imageui(write_only image2d_t, int2, uint4); void __ovld write_imagef(write_only image2d_array_t, int4, float4); void __ovld write_imagei(write_only image2d_array_t, int4, int4); void __ovld write_imageui(write_only image2d_array_t, int4, uint4); void __ovld write_imagef(write_only image1d_t, int, float4); void __ovld write_imagei(write_only image1d_t, int, int4); void __ovld write_imageui(write_only image1d_t, int, uint4); void __ovld write_imagef(write_only image1d_buffer_t, int, float4); void __ovld write_imagei(write_only image1d_buffer_t, int, int4); void __ovld write_imageui(write_only image1d_buffer_t, int, uint4); void __ovld write_imagef(write_only image1d_array_t, int2, float4); void __ovld write_imagei(write_only image1d_array_t, int2, int4); void __ovld write_imageui(write_only image1d_array_t, int2, uint4); #ifdef cl_khr_3d_image_writes void __ovld write_imagef(write_only image3d_t, int4, float4); void __ovld write_imagei(write_only image3d_t, int4, int4); void __ovld write_imageui(write_only image3d_t, int4, uint4); #endif #ifdef cl_khr_depth_images void __ovld write_imagef(write_only image2d_depth_t, int2, float); void __ovld write_imagef(write_only image2d_array_depth_t, int4, float); #endif //cl_khr_depth_images // OpenCL Extension v2.0 s9.18 - Mipmaps #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) #if defined(cl_khr_mipmap_image_writes) void __ovld write_imagef(write_only image1d_t, int, int, float4); void __ovld write_imagei(write_only image1d_t, int, int, int4); void __ovld write_imageui(write_only image1d_t, int, int, uint4); void __ovld write_imagef(write_only image1d_array_t, int2, int, float4); void __ovld write_imagei(write_only image1d_array_t, int2, int, int4); void __ovld write_imageui(write_only image1d_array_t, int2, int, uint4); void __ovld write_imagef(write_only image2d_t, int2, int, float4); void __ovld write_imagei(write_only image2d_t, int2, int, int4); void __ovld write_imageui(write_only image2d_t, int2, int, uint4); void __ovld write_imagef(write_only image2d_array_t, int4, int, float4); void __ovld write_imagei(write_only image2d_array_t, int4, int, int4); void __ovld write_imageui(write_only image2d_array_t, int4, int, uint4); void __ovld write_imagef(write_only image2d_depth_t, int2, int, float); void __ovld write_imagef(write_only image2d_array_depth_t, int4, int, float); #ifdef cl_khr_3d_image_writes void __ovld write_imagef(write_only image3d_t, int4, int, float4); void __ovld write_imagei(write_only image3d_t, int4, int, int4); void __ovld write_imageui(write_only image3d_t, int4, int, uint4); #endif //cl_khr_3d_image_writes #endif //defined(cl_khr_mipmap_image_writes) #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) // Image write functions for half4 type #ifdef cl_khr_fp16 void __ovld write_imageh(write_only image1d_t, int, half4); void __ovld write_imageh(write_only image2d_t, int2, half4); #ifdef cl_khr_3d_image_writes void __ovld write_imageh(write_only image3d_t, int4, half4); #endif void __ovld write_imageh(write_only image1d_array_t, int2, half4); void __ovld write_imageh(write_only image2d_array_t, int4, half4); void __ovld write_imageh(write_only image1d_buffer_t, int, half4); #endif //cl_khr_fp16 // Image write functions for read_write images #if defined(__opencl_c_read_write_images) void __ovld write_imagef(read_write image2d_t, int2, float4); void __ovld write_imagei(read_write image2d_t, int2, int4); void __ovld write_imageui(read_write image2d_t, int2, uint4); void __ovld write_imagef(read_write image2d_array_t, int4, float4); void __ovld write_imagei(read_write image2d_array_t, int4, int4); void __ovld write_imageui(read_write image2d_array_t, int4, uint4); void __ovld write_imagef(read_write image1d_t, int, float4); void __ovld write_imagei(read_write image1d_t, int, int4); void __ovld write_imageui(read_write image1d_t, int, uint4); void __ovld write_imagef(read_write image1d_buffer_t, int, float4); void __ovld write_imagei(read_write image1d_buffer_t, int, int4); void __ovld write_imageui(read_write image1d_buffer_t, int, uint4); void __ovld write_imagef(read_write image1d_array_t, int2, float4); void __ovld write_imagei(read_write image1d_array_t, int2, int4); void __ovld write_imageui(read_write image1d_array_t, int2, uint4); #ifdef cl_khr_3d_image_writes void __ovld write_imagef(read_write image3d_t, int4, float4); void __ovld write_imagei(read_write image3d_t, int4, int4); void __ovld write_imageui(read_write image3d_t, int4, uint4); #endif #ifdef cl_khr_depth_images void __ovld write_imagef(read_write image2d_depth_t, int2, float); void __ovld write_imagef(read_write image2d_array_depth_t, int4, float); #endif //cl_khr_depth_images #if defined(cl_khr_mipmap_image_writes) void __ovld write_imagef(read_write image1d_t, int, int, float4); void __ovld write_imagei(read_write image1d_t, int, int, int4); void __ovld write_imageui(read_write image1d_t, int, int, uint4); void __ovld write_imagef(read_write image1d_array_t, int2, int, float4); void __ovld write_imagei(read_write image1d_array_t, int2, int, int4); void __ovld write_imageui(read_write image1d_array_t, int2, int, uint4); void __ovld write_imagef(read_write image2d_t, int2, int, float4); void __ovld write_imagei(read_write image2d_t, int2, int, int4); void __ovld write_imageui(read_write image2d_t, int2, int, uint4); void __ovld write_imagef(read_write image2d_array_t, int4, int, float4); void __ovld write_imagei(read_write image2d_array_t, int4, int, int4); void __ovld write_imageui(read_write image2d_array_t, int4, int, uint4); void __ovld write_imagef(read_write image2d_depth_t, int2, int, float); void __ovld write_imagef(read_write image2d_array_depth_t, int4, int, float); #ifdef cl_khr_3d_image_writes void __ovld write_imagef(read_write image3d_t, int4, int, float4); void __ovld write_imagei(read_write image3d_t, int4, int, int4); void __ovld write_imageui(read_write image3d_t, int4, int, uint4); #endif //cl_khr_3d_image_writes #endif //cl_khr_mipmap_image_writes // Image write functions for half4 type #ifdef cl_khr_fp16 void __ovld write_imageh(read_write image1d_t, int, half4); void __ovld write_imageh(read_write image2d_t, int2, half4); #ifdef cl_khr_3d_image_writes void __ovld write_imageh(read_write image3d_t, int4, half4); #endif void __ovld write_imageh(read_write image1d_array_t, int2, half4); void __ovld write_imageh(read_write image2d_array_t, int4, half4); void __ovld write_imageh(read_write image1d_buffer_t, int, half4); #endif //cl_khr_fp16 #endif //defined(__opencl_c_read_write_images) // Note: In OpenCL v1.0/1.1/1.2, image argument of image query builtin functions does not have // access qualifier, which by default assume read_only access qualifier. Image query builtin // functions with write_only image argument should also be declared. /** * Return the image width in pixels. * */ int __ovld __cnfn get_image_width(read_only image1d_t); int __ovld __cnfn get_image_width(read_only image1d_buffer_t); int __ovld __cnfn get_image_width(read_only image2d_t); int __ovld __cnfn get_image_width(read_only image3d_t); int __ovld __cnfn get_image_width(read_only image1d_array_t); int __ovld __cnfn get_image_width(read_only image2d_array_t); #ifdef cl_khr_depth_images int __ovld __cnfn get_image_width(read_only image2d_depth_t); int __ovld __cnfn get_image_width(read_only image2d_array_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) int __ovld __cnfn get_image_width(read_only image2d_msaa_t); int __ovld __cnfn get_image_width(read_only image2d_msaa_depth_t); int __ovld __cnfn get_image_width(read_only image2d_array_msaa_t); int __ovld __cnfn get_image_width(read_only image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing int __ovld __cnfn get_image_width(write_only image1d_t); int __ovld __cnfn get_image_width(write_only image1d_buffer_t); int __ovld __cnfn get_image_width(write_only image2d_t); #ifdef cl_khr_3d_image_writes int __ovld __cnfn get_image_width(write_only image3d_t); #endif int __ovld __cnfn get_image_width(write_only image1d_array_t); int __ovld __cnfn get_image_width(write_only image2d_array_t); #ifdef cl_khr_depth_images int __ovld __cnfn get_image_width(write_only image2d_depth_t); int __ovld __cnfn get_image_width(write_only image2d_array_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) int __ovld __cnfn get_image_width(write_only image2d_msaa_t); int __ovld __cnfn get_image_width(write_only image2d_msaa_depth_t); int __ovld __cnfn get_image_width(write_only image2d_array_msaa_t); int __ovld __cnfn get_image_width(write_only image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing #if defined(__opencl_c_read_write_images) int __ovld __cnfn get_image_width(read_write image1d_t); int __ovld __cnfn get_image_width(read_write image1d_buffer_t); int __ovld __cnfn get_image_width(read_write image2d_t); #ifdef cl_khr_3d_image_writes int __ovld __cnfn get_image_width(read_write image3d_t); #endif // cl_khr_3d_image_writes int __ovld __cnfn get_image_width(read_write image1d_array_t); int __ovld __cnfn get_image_width(read_write image2d_array_t); #ifdef cl_khr_depth_images int __ovld __cnfn get_image_width(read_write image2d_depth_t); int __ovld __cnfn get_image_width(read_write image2d_array_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) int __ovld __cnfn get_image_width(read_write image2d_msaa_t); int __ovld __cnfn get_image_width(read_write image2d_msaa_depth_t); int __ovld __cnfn get_image_width(read_write image2d_array_msaa_t); int __ovld __cnfn get_image_width(read_write image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing #endif //defined(__opencl_c_read_write_images) /** * Return the image height in pixels. */ int __ovld __cnfn get_image_height(read_only image2d_t); int __ovld __cnfn get_image_height(read_only image3d_t); int __ovld __cnfn get_image_height(read_only image2d_array_t); #ifdef cl_khr_depth_images int __ovld __cnfn get_image_height(read_only image2d_depth_t); int __ovld __cnfn get_image_height(read_only image2d_array_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) int __ovld __cnfn get_image_height(read_only image2d_msaa_t); int __ovld __cnfn get_image_height(read_only image2d_msaa_depth_t); int __ovld __cnfn get_image_height(read_only image2d_array_msaa_t); int __ovld __cnfn get_image_height(read_only image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing int __ovld __cnfn get_image_height(write_only image2d_t); #ifdef cl_khr_3d_image_writes int __ovld __cnfn get_image_height(write_only image3d_t); #endif int __ovld __cnfn get_image_height(write_only image2d_array_t); #ifdef cl_khr_depth_images int __ovld __cnfn get_image_height(write_only image2d_depth_t); int __ovld __cnfn get_image_height(write_only image2d_array_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) int __ovld __cnfn get_image_height(write_only image2d_msaa_t); int __ovld __cnfn get_image_height(write_only image2d_msaa_depth_t); int __ovld __cnfn get_image_height(write_only image2d_array_msaa_t); int __ovld __cnfn get_image_height(write_only image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing #if defined(__opencl_c_read_write_images) int __ovld __cnfn get_image_height(read_write image2d_t); #ifdef cl_khr_3d_image_writes int __ovld __cnfn get_image_height(read_write image3d_t); #endif // cl_khr_3d_image_writes int __ovld __cnfn get_image_height(read_write image2d_array_t); #ifdef cl_khr_depth_images int __ovld __cnfn get_image_height(read_write image2d_depth_t); int __ovld __cnfn get_image_height(read_write image2d_array_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) int __ovld __cnfn get_image_height(read_write image2d_msaa_t); int __ovld __cnfn get_image_height(read_write image2d_msaa_depth_t); int __ovld __cnfn get_image_height(read_write image2d_array_msaa_t); int __ovld __cnfn get_image_height(read_write image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing #endif //defined(__opencl_c_read_write_images) /** * Return the image depth in pixels. */ int __ovld __cnfn get_image_depth(read_only image3d_t); #ifdef cl_khr_3d_image_writes int __ovld __cnfn get_image_depth(write_only image3d_t); #if defined(__opencl_c_read_write_images) int __ovld __cnfn get_image_depth(read_write image3d_t); #endif //defined(__opencl_c_read_write_images) #endif // cl_khr_3d_image_writes // OpenCL Extension v2.0 s9.18 - Mipmaps #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) #ifdef cl_khr_mipmap_image /** * Return the image miplevels. */ int __ovld get_image_num_mip_levels(read_only image1d_t); int __ovld get_image_num_mip_levels(read_only image2d_t); int __ovld get_image_num_mip_levels(read_only image3d_t); int __ovld get_image_num_mip_levels(write_only image1d_t); int __ovld get_image_num_mip_levels(write_only image2d_t); #ifdef cl_khr_3d_image_writes int __ovld get_image_num_mip_levels(write_only image3d_t); #endif #if defined(__opencl_c_read_write_images) int __ovld get_image_num_mip_levels(read_write image1d_t); int __ovld get_image_num_mip_levels(read_write image2d_t); #ifdef cl_khr_3d_image_writes int __ovld get_image_num_mip_levels(read_write image3d_t); #endif // cl_khr_3d_image_writes #endif //defined(__opencl_c_read_write_images) int __ovld get_image_num_mip_levels(read_only image1d_array_t); int __ovld get_image_num_mip_levels(read_only image2d_array_t); #ifdef cl_khr_depth_images int __ovld get_image_num_mip_levels(read_only image2d_array_depth_t); int __ovld get_image_num_mip_levels(read_only image2d_depth_t); #endif // cl_khr_depth_images int __ovld get_image_num_mip_levels(write_only image1d_array_t); int __ovld get_image_num_mip_levels(write_only image2d_array_t); #ifdef cl_khr_depth_images int __ovld get_image_num_mip_levels(write_only image2d_array_depth_t); int __ovld get_image_num_mip_levels(write_only image2d_depth_t); #endif // cl_khr_depth_images #if defined(__opencl_c_read_write_images) int __ovld get_image_num_mip_levels(read_write image1d_array_t); int __ovld get_image_num_mip_levels(read_write image2d_array_t); #ifdef cl_khr_depth_images int __ovld get_image_num_mip_levels(read_write image2d_array_depth_t); int __ovld get_image_num_mip_levels(read_write image2d_depth_t); #endif // cl_khr_depth_images #endif //defined(__opencl_c_read_write_images) #endif //cl_khr_mipmap_image #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) /** * Return the channel data type. Valid values are: * CLK_SNORM_INT8 * CLK_SNORM_INT16 * CLK_UNORM_INT8 * CLK_UNORM_INT16 * CLK_UNORM_SHORT_565 * CLK_UNORM_SHORT_555 * CLK_UNORM_SHORT_101010 * CLK_SIGNED_INT8 * CLK_SIGNED_INT16 * CLK_SIGNED_INT32 * CLK_UNSIGNED_INT8 * CLK_UNSIGNED_INT16 * CLK_UNSIGNED_INT32 * CLK_HALF_FLOAT * CLK_FLOAT */ int __ovld __cnfn get_image_channel_data_type(read_only image1d_t); int __ovld __cnfn get_image_channel_data_type(read_only image1d_buffer_t); int __ovld __cnfn get_image_channel_data_type(read_only image2d_t); int __ovld __cnfn get_image_channel_data_type(read_only image3d_t); int __ovld __cnfn get_image_channel_data_type(read_only image1d_array_t); int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_t); #ifdef cl_khr_depth_images int __ovld __cnfn get_image_channel_data_type(read_only image2d_depth_t); int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) int __ovld __cnfn get_image_channel_data_type(read_only image2d_msaa_t); int __ovld __cnfn get_image_channel_data_type(read_only image2d_msaa_depth_t); int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_msaa_t); int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing int __ovld __cnfn get_image_channel_data_type(write_only image1d_t); int __ovld __cnfn get_image_channel_data_type(write_only image1d_buffer_t); int __ovld __cnfn get_image_channel_data_type(write_only image2d_t); #ifdef cl_khr_3d_image_writes int __ovld __cnfn get_image_channel_data_type(write_only image3d_t); #endif int __ovld __cnfn get_image_channel_data_type(write_only image1d_array_t); int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_t); #ifdef cl_khr_depth_images int __ovld __cnfn get_image_channel_data_type(write_only image2d_depth_t); int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) int __ovld __cnfn get_image_channel_data_type(write_only image2d_msaa_t); int __ovld __cnfn get_image_channel_data_type(write_only image2d_msaa_depth_t); int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_msaa_t); int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing #if defined(__opencl_c_read_write_images) int __ovld __cnfn get_image_channel_data_type(read_write image1d_t); int __ovld __cnfn get_image_channel_data_type(read_write image1d_buffer_t); int __ovld __cnfn get_image_channel_data_type(read_write image2d_t); #ifdef cl_khr_3d_image_writes int __ovld __cnfn get_image_channel_data_type(read_write image3d_t); #endif // cl_khr_3d_image_writes int __ovld __cnfn get_image_channel_data_type(read_write image1d_array_t); int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_t); #ifdef cl_khr_depth_images int __ovld __cnfn get_image_channel_data_type(read_write image2d_depth_t); int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) int __ovld __cnfn get_image_channel_data_type(read_write image2d_msaa_t); int __ovld __cnfn get_image_channel_data_type(read_write image2d_msaa_depth_t); int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_t); int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing #endif //defined(__opencl_c_read_write_images) /** * Return the image channel order. Valid values are: * CLK_A * CLK_R * CLK_Rx * CLK_RG * CLK_RGx * CLK_RA * CLK_RGB * CLK_RGBx * CLK_RGBA * CLK_ARGB * CLK_BGRA * CLK_INTENSITY * CLK_LUMINANCE */ int __ovld __cnfn get_image_channel_order(read_only image1d_t); int __ovld __cnfn get_image_channel_order(read_only image1d_buffer_t); int __ovld __cnfn get_image_channel_order(read_only image2d_t); int __ovld __cnfn get_image_channel_order(read_only image3d_t); int __ovld __cnfn get_image_channel_order(read_only image1d_array_t); int __ovld __cnfn get_image_channel_order(read_only image2d_array_t); #ifdef cl_khr_depth_images int __ovld __cnfn get_image_channel_order(read_only image2d_depth_t); int __ovld __cnfn get_image_channel_order(read_only image2d_array_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) int __ovld __cnfn get_image_channel_order(read_only image2d_msaa_t); int __ovld __cnfn get_image_channel_order(read_only image2d_msaa_depth_t); int __ovld __cnfn get_image_channel_order(read_only image2d_array_msaa_t); int __ovld __cnfn get_image_channel_order(read_only image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing int __ovld __cnfn get_image_channel_order(write_only image1d_t); int __ovld __cnfn get_image_channel_order(write_only image1d_buffer_t); int __ovld __cnfn get_image_channel_order(write_only image2d_t); #ifdef cl_khr_3d_image_writes int __ovld __cnfn get_image_channel_order(write_only image3d_t); #endif int __ovld __cnfn get_image_channel_order(write_only image1d_array_t); int __ovld __cnfn get_image_channel_order(write_only image2d_array_t); #ifdef cl_khr_depth_images int __ovld __cnfn get_image_channel_order(write_only image2d_depth_t); int __ovld __cnfn get_image_channel_order(write_only image2d_array_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) int __ovld __cnfn get_image_channel_order(write_only image2d_msaa_t); int __ovld __cnfn get_image_channel_order(write_only image2d_msaa_depth_t); int __ovld __cnfn get_image_channel_order(write_only image2d_array_msaa_t); int __ovld __cnfn get_image_channel_order(write_only image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing #if defined(__opencl_c_read_write_images) int __ovld __cnfn get_image_channel_order(read_write image1d_t); int __ovld __cnfn get_image_channel_order(read_write image1d_buffer_t); int __ovld __cnfn get_image_channel_order(read_write image2d_t); #ifdef cl_khr_3d_image_writes int __ovld __cnfn get_image_channel_order(read_write image3d_t); #endif // cl_khr_3d_image_writes int __ovld __cnfn get_image_channel_order(read_write image1d_array_t); int __ovld __cnfn get_image_channel_order(read_write image2d_array_t); #ifdef cl_khr_depth_images int __ovld __cnfn get_image_channel_order(read_write image2d_depth_t); int __ovld __cnfn get_image_channel_order(read_write image2d_array_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) int __ovld __cnfn get_image_channel_order(read_write image2d_msaa_t); int __ovld __cnfn get_image_channel_order(read_write image2d_msaa_depth_t); int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_t); int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing #endif //defined(__opencl_c_read_write_images) /** * Return the 2D image width and height as an int2 * type. The width is returned in the x component, and * the height in the y component. */ int2 __ovld __cnfn get_image_dim(read_only image2d_t); int2 __ovld __cnfn get_image_dim(read_only image2d_array_t); #ifdef cl_khr_depth_images int2 __ovld __cnfn get_image_dim(read_only image2d_array_depth_t); int2 __ovld __cnfn get_image_dim(read_only image2d_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) int2 __ovld __cnfn get_image_dim(read_only image2d_msaa_t); int2 __ovld __cnfn get_image_dim(read_only image2d_msaa_depth_t); int2 __ovld __cnfn get_image_dim(read_only image2d_array_msaa_t); int2 __ovld __cnfn get_image_dim(read_only image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing int2 __ovld __cnfn get_image_dim(write_only image2d_t); int2 __ovld __cnfn get_image_dim(write_only image2d_array_t); #ifdef cl_khr_depth_images int2 __ovld __cnfn get_image_dim(write_only image2d_array_depth_t); int2 __ovld __cnfn get_image_dim(write_only image2d_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) int2 __ovld __cnfn get_image_dim(write_only image2d_msaa_t); int2 __ovld __cnfn get_image_dim(write_only image2d_msaa_depth_t); int2 __ovld __cnfn get_image_dim(write_only image2d_array_msaa_t); int2 __ovld __cnfn get_image_dim(write_only image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing #if defined(__opencl_c_read_write_images) int2 __ovld __cnfn get_image_dim(read_write image2d_t); int2 __ovld __cnfn get_image_dim(read_write image2d_array_t); #ifdef cl_khr_depth_images int2 __ovld __cnfn get_image_dim(read_write image2d_array_depth_t); int2 __ovld __cnfn get_image_dim(read_write image2d_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) int2 __ovld __cnfn get_image_dim(read_write image2d_msaa_t); int2 __ovld __cnfn get_image_dim(read_write image2d_msaa_depth_t); int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_t); int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing #endif //defined(__opencl_c_read_write_images) /** * Return the 3D image width, height, and depth as an * int4 type. The width is returned in the x * component, height in the y component, depth in the z * component and the w component is 0. */ int4 __ovld __cnfn get_image_dim(read_only image3d_t); #ifdef cl_khr_3d_image_writes int4 __ovld __cnfn get_image_dim(write_only image3d_t); #if defined(__opencl_c_read_write_images) int4 __ovld __cnfn get_image_dim(read_write image3d_t); #endif //defined(__opencl_c_read_write_images) #endif // cl_khr_3d_image_writes /** * Return the image array size. */ size_t __ovld __cnfn get_image_array_size(read_only image1d_array_t); size_t __ovld __cnfn get_image_array_size(read_only image2d_array_t); #ifdef cl_khr_depth_images size_t __ovld __cnfn get_image_array_size(read_only image2d_array_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) size_t __ovld __cnfn get_image_array_size(read_only image2d_array_msaa_t); size_t __ovld __cnfn get_image_array_size(read_only image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing size_t __ovld __cnfn get_image_array_size(write_only image1d_array_t); size_t __ovld __cnfn get_image_array_size(write_only image2d_array_t); #ifdef cl_khr_depth_images size_t __ovld __cnfn get_image_array_size(write_only image2d_array_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) size_t __ovld __cnfn get_image_array_size(write_only image2d_array_msaa_t); size_t __ovld __cnfn get_image_array_size(write_only image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing #if defined(__opencl_c_read_write_images) size_t __ovld __cnfn get_image_array_size(read_write image1d_array_t); size_t __ovld __cnfn get_image_array_size(read_write image2d_array_t); #ifdef cl_khr_depth_images size_t __ovld __cnfn get_image_array_size(read_write image2d_array_depth_t); #endif //cl_khr_depth_images #if defined(cl_khr_gl_msaa_sharing) size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_t); size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_depth_t); #endif //cl_khr_gl_msaa_sharing #endif //defined(__opencl_c_read_write_images) /** * Return the number of samples associated with image */ #if defined(cl_khr_gl_msaa_sharing) int __ovld __cnfn get_image_num_samples(read_only image2d_msaa_t); int __ovld __cnfn get_image_num_samples(read_only image2d_msaa_depth_t); int __ovld __cnfn get_image_num_samples(read_only image2d_array_msaa_t); int __ovld __cnfn get_image_num_samples(read_only image2d_array_msaa_depth_t); int __ovld __cnfn get_image_num_samples(write_only image2d_msaa_t); int __ovld __cnfn get_image_num_samples(write_only image2d_msaa_depth_t); int __ovld __cnfn get_image_num_samples(write_only image2d_array_msaa_t); int __ovld __cnfn get_image_num_samples(write_only image2d_array_msaa_depth_t); #if defined(__opencl_c_read_write_images) int __ovld __cnfn get_image_num_samples(read_write image2d_msaa_t); int __ovld __cnfn get_image_num_samples(read_write image2d_msaa_depth_t); int __ovld __cnfn get_image_num_samples(read_write image2d_array_msaa_t); int __ovld __cnfn get_image_num_samples(read_write image2d_array_msaa_depth_t); #endif //defined(__opencl_c_read_write_images) #endif // OpenCL v2.0 s6.13.15 - Work-group Functions #if defined(__opencl_c_work_group_collective_functions) int __ovld __conv work_group_all(int predicate); int __ovld __conv work_group_any(int predicate); #ifdef cl_khr_fp16 half __ovld __conv work_group_broadcast(half, size_t local_id); half __ovld __conv work_group_broadcast(half, size_t, size_t); half __ovld __conv work_group_broadcast(half, size_t, size_t, size_t); #endif int __ovld __conv work_group_broadcast(int, size_t local_id); int __ovld __conv work_group_broadcast(int, size_t, size_t); int __ovld __conv work_group_broadcast(int, size_t, size_t, size_t); uint __ovld __conv work_group_broadcast(uint, size_t local_id); uint __ovld __conv work_group_broadcast(uint, size_t, size_t); uint __ovld __conv work_group_broadcast(uint, size_t, size_t, size_t); long __ovld __conv work_group_broadcast(long, size_t local_id); long __ovld __conv work_group_broadcast(long, size_t, size_t); long __ovld __conv work_group_broadcast(long, size_t, size_t, size_t); ulong __ovld __conv work_group_broadcast(ulong, size_t local_id); ulong __ovld __conv work_group_broadcast(ulong, size_t, size_t); ulong __ovld __conv work_group_broadcast(ulong, size_t, size_t, size_t); float __ovld __conv work_group_broadcast(float, size_t local_id); float __ovld __conv work_group_broadcast(float, size_t, size_t); float __ovld __conv work_group_broadcast(float, size_t, size_t, size_t); #ifdef cl_khr_fp64 double __ovld __conv work_group_broadcast(double, size_t local_id); double __ovld __conv work_group_broadcast(double, size_t, size_t); double __ovld __conv work_group_broadcast(double, size_t, size_t, size_t); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 half __ovld __conv work_group_reduce_add(half); half __ovld __conv work_group_reduce_min(half); half __ovld __conv work_group_reduce_max(half); half __ovld __conv work_group_scan_exclusive_add(half); half __ovld __conv work_group_scan_exclusive_min(half); half __ovld __conv work_group_scan_exclusive_max(half); half __ovld __conv work_group_scan_inclusive_add(half); half __ovld __conv work_group_scan_inclusive_min(half); half __ovld __conv work_group_scan_inclusive_max(half); #endif int __ovld __conv work_group_reduce_add(int); int __ovld __conv work_group_reduce_min(int); int __ovld __conv work_group_reduce_max(int); int __ovld __conv work_group_scan_exclusive_add(int); int __ovld __conv work_group_scan_exclusive_min(int); int __ovld __conv work_group_scan_exclusive_max(int); int __ovld __conv work_group_scan_inclusive_add(int); int __ovld __conv work_group_scan_inclusive_min(int); int __ovld __conv work_group_scan_inclusive_max(int); uint __ovld __conv work_group_reduce_add(uint); uint __ovld __conv work_group_reduce_min(uint); uint __ovld __conv work_group_reduce_max(uint); uint __ovld __conv work_group_scan_exclusive_add(uint); uint __ovld __conv work_group_scan_exclusive_min(uint); uint __ovld __conv work_group_scan_exclusive_max(uint); uint __ovld __conv work_group_scan_inclusive_add(uint); uint __ovld __conv work_group_scan_inclusive_min(uint); uint __ovld __conv work_group_scan_inclusive_max(uint); long __ovld __conv work_group_reduce_add(long); long __ovld __conv work_group_reduce_min(long); long __ovld __conv work_group_reduce_max(long); long __ovld __conv work_group_scan_exclusive_add(long); long __ovld __conv work_group_scan_exclusive_min(long); long __ovld __conv work_group_scan_exclusive_max(long); long __ovld __conv work_group_scan_inclusive_add(long); long __ovld __conv work_group_scan_inclusive_min(long); long __ovld __conv work_group_scan_inclusive_max(long); ulong __ovld __conv work_group_reduce_add(ulong); ulong __ovld __conv work_group_reduce_min(ulong); ulong __ovld __conv work_group_reduce_max(ulong); ulong __ovld __conv work_group_scan_exclusive_add(ulong); ulong __ovld __conv work_group_scan_exclusive_min(ulong); ulong __ovld __conv work_group_scan_exclusive_max(ulong); ulong __ovld __conv work_group_scan_inclusive_add(ulong); ulong __ovld __conv work_group_scan_inclusive_min(ulong); ulong __ovld __conv work_group_scan_inclusive_max(ulong); float __ovld __conv work_group_reduce_add(float); float __ovld __conv work_group_reduce_min(float); float __ovld __conv work_group_reduce_max(float); float __ovld __conv work_group_scan_exclusive_add(float); float __ovld __conv work_group_scan_exclusive_min(float); float __ovld __conv work_group_scan_exclusive_max(float); float __ovld __conv work_group_scan_inclusive_add(float); float __ovld __conv work_group_scan_inclusive_min(float); float __ovld __conv work_group_scan_inclusive_max(float); #ifdef cl_khr_fp64 double __ovld __conv work_group_reduce_add(double); double __ovld __conv work_group_reduce_min(double); double __ovld __conv work_group_reduce_max(double); double __ovld __conv work_group_scan_exclusive_add(double); double __ovld __conv work_group_scan_exclusive_min(double); double __ovld __conv work_group_scan_exclusive_max(double); double __ovld __conv work_group_scan_inclusive_add(double); double __ovld __conv work_group_scan_inclusive_min(double); double __ovld __conv work_group_scan_inclusive_max(double); #endif //cl_khr_fp64 #endif //defined(__opencl_c_work_group_collective_functions) // OpenCL v2.0 s6.13.16 - Pipe Functions #if defined(__opencl_c_pipes) bool __ovld is_valid_reserve_id(reserve_id_t reserve_id); #endif //defined(__opencl_c_pipes) // OpenCL v2.0 s6.13.17 - Enqueue Kernels #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) #ifdef __opencl_c_device_enqueue ndrange_t __ovld ndrange_1D(size_t); ndrange_t __ovld ndrange_1D(size_t, size_t); ndrange_t __ovld ndrange_1D(size_t, size_t, size_t); ndrange_t __ovld ndrange_2D(const size_t[2]); ndrange_t __ovld ndrange_2D(const size_t[2], const size_t[2]); ndrange_t __ovld ndrange_2D(const size_t[2], const size_t[2], const size_t[2]); ndrange_t __ovld ndrange_3D(const size_t[3]); ndrange_t __ovld ndrange_3D(const size_t[3], const size_t[3]); ndrange_t __ovld ndrange_3D(const size_t[3], const size_t[3], const size_t[3]); int __ovld enqueue_marker(queue_t, uint, const clk_event_t*, clk_event_t*); void __ovld retain_event(clk_event_t); void __ovld release_event(clk_event_t); clk_event_t __ovld create_user_event(void); void __ovld set_user_event_status(clk_event_t e, int state); bool __ovld is_valid_event (clk_event_t event); void __ovld capture_event_profiling_info(clk_event_t, clk_profiling_info, __global void*); queue_t __ovld get_default_queue(void); #endif //__opencl_c_device_enqueue #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) // OpenCL Extension v2.0 s9.17 - Sub-groups #if defined(__opencl_subgroup_builtins) // Shared Sub Group Functions uint __ovld get_sub_group_size(void); uint __ovld get_max_sub_group_size(void); uint __ovld get_num_sub_groups(void); #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) uint __ovld get_enqueued_num_sub_groups(void); #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) uint __ovld get_sub_group_id(void); uint __ovld get_sub_group_local_id(void); void __ovld __conv sub_group_barrier(cl_mem_fence_flags); #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) void __ovld __conv sub_group_barrier(cl_mem_fence_flags, memory_scope); #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) int __ovld __conv sub_group_all(int predicate); int __ovld __conv sub_group_any(int predicate); int __ovld __conv sub_group_broadcast(int , uint sub_group_local_id); uint __ovld __conv sub_group_broadcast(uint , uint sub_group_local_id); long __ovld __conv sub_group_broadcast(long , uint sub_group_local_id); ulong __ovld __conv sub_group_broadcast(ulong, uint sub_group_local_id); float __ovld __conv sub_group_broadcast(float, uint sub_group_local_id); int __ovld __conv sub_group_reduce_add(int ); uint __ovld __conv sub_group_reduce_add(uint ); long __ovld __conv sub_group_reduce_add(long ); ulong __ovld __conv sub_group_reduce_add(ulong); float __ovld __conv sub_group_reduce_add(float); int __ovld __conv sub_group_reduce_min(int ); uint __ovld __conv sub_group_reduce_min(uint ); long __ovld __conv sub_group_reduce_min(long ); ulong __ovld __conv sub_group_reduce_min(ulong); float __ovld __conv sub_group_reduce_min(float); int __ovld __conv sub_group_reduce_max(int ); uint __ovld __conv sub_group_reduce_max(uint ); long __ovld __conv sub_group_reduce_max(long ); ulong __ovld __conv sub_group_reduce_max(ulong); float __ovld __conv sub_group_reduce_max(float); int __ovld __conv sub_group_scan_exclusive_add(int ); uint __ovld __conv sub_group_scan_exclusive_add(uint ); long __ovld __conv sub_group_scan_exclusive_add(long ); ulong __ovld __conv sub_group_scan_exclusive_add(ulong); float __ovld __conv sub_group_scan_exclusive_add(float); int __ovld __conv sub_group_scan_exclusive_min(int ); uint __ovld __conv sub_group_scan_exclusive_min(uint ); long __ovld __conv sub_group_scan_exclusive_min(long ); ulong __ovld __conv sub_group_scan_exclusive_min(ulong); float __ovld __conv sub_group_scan_exclusive_min(float); int __ovld __conv sub_group_scan_exclusive_max(int ); uint __ovld __conv sub_group_scan_exclusive_max(uint ); long __ovld __conv sub_group_scan_exclusive_max(long ); ulong __ovld __conv sub_group_scan_exclusive_max(ulong); float __ovld __conv sub_group_scan_exclusive_max(float); int __ovld __conv sub_group_scan_inclusive_add(int ); uint __ovld __conv sub_group_scan_inclusive_add(uint ); long __ovld __conv sub_group_scan_inclusive_add(long ); ulong __ovld __conv sub_group_scan_inclusive_add(ulong); float __ovld __conv sub_group_scan_inclusive_add(float); int __ovld __conv sub_group_scan_inclusive_min(int ); uint __ovld __conv sub_group_scan_inclusive_min(uint ); long __ovld __conv sub_group_scan_inclusive_min(long ); ulong __ovld __conv sub_group_scan_inclusive_min(ulong); float __ovld __conv sub_group_scan_inclusive_min(float); int __ovld __conv sub_group_scan_inclusive_max(int ); uint __ovld __conv sub_group_scan_inclusive_max(uint ); long __ovld __conv sub_group_scan_inclusive_max(long ); ulong __ovld __conv sub_group_scan_inclusive_max(ulong); float __ovld __conv sub_group_scan_inclusive_max(float); #ifdef cl_khr_fp16 half __ovld __conv sub_group_broadcast(half, uint sub_group_local_id); half __ovld __conv sub_group_reduce_add(half); half __ovld __conv sub_group_reduce_min(half); half __ovld __conv sub_group_reduce_max(half); half __ovld __conv sub_group_scan_exclusive_add(half); half __ovld __conv sub_group_scan_exclusive_min(half); half __ovld __conv sub_group_scan_exclusive_max(half); half __ovld __conv sub_group_scan_inclusive_add(half); half __ovld __conv sub_group_scan_inclusive_min(half); half __ovld __conv sub_group_scan_inclusive_max(half); #endif //cl_khr_fp16 #ifdef cl_khr_fp64 double __ovld __conv sub_group_broadcast(double, uint sub_group_local_id); double __ovld __conv sub_group_reduce_add(double); double __ovld __conv sub_group_reduce_min(double); double __ovld __conv sub_group_reduce_max(double); double __ovld __conv sub_group_scan_exclusive_add(double); double __ovld __conv sub_group_scan_exclusive_min(double); double __ovld __conv sub_group_scan_exclusive_max(double); double __ovld __conv sub_group_scan_inclusive_add(double); double __ovld __conv sub_group_scan_inclusive_min(double); double __ovld __conv sub_group_scan_inclusive_max(double); #endif //cl_khr_fp64 #endif // __opencl_subgroup_builtins #if defined(cl_khr_subgroup_extended_types) char __ovld __conv sub_group_broadcast( char value, uint index ); char2 __ovld __conv sub_group_broadcast( char2 value, uint index ); char3 __ovld __conv sub_group_broadcast( char3 value, uint index ); char4 __ovld __conv sub_group_broadcast( char4 value, uint index ); char8 __ovld __conv sub_group_broadcast( char8 value, uint index ); char16 __ovld __conv sub_group_broadcast( char16 value, uint index ); uchar __ovld __conv sub_group_broadcast( uchar value, uint index ); uchar2 __ovld __conv sub_group_broadcast( uchar2 value, uint index ); uchar3 __ovld __conv sub_group_broadcast( uchar3 value, uint index ); uchar4 __ovld __conv sub_group_broadcast( uchar4 value, uint index ); uchar8 __ovld __conv sub_group_broadcast( uchar8 value, uint index ); uchar16 __ovld __conv sub_group_broadcast( uchar16 value, uint index ); short __ovld __conv sub_group_broadcast( short value, uint index ); short2 __ovld __conv sub_group_broadcast( short2 value, uint index ); short3 __ovld __conv sub_group_broadcast( short3 value, uint index ); short4 __ovld __conv sub_group_broadcast( short4 value, uint index ); short8 __ovld __conv sub_group_broadcast( short8 value, uint index ); short16 __ovld __conv sub_group_broadcast( short16 value, uint index ); ushort __ovld __conv sub_group_broadcast( ushort value, uint index ); ushort2 __ovld __conv sub_group_broadcast( ushort2 value, uint index ); ushort3 __ovld __conv sub_group_broadcast( ushort3 value, uint index ); ushort4 __ovld __conv sub_group_broadcast( ushort4 value, uint index ); ushort8 __ovld __conv sub_group_broadcast( ushort8 value, uint index ); ushort16 __ovld __conv sub_group_broadcast( ushort16 value, uint index ); // scalar int broadcast is part of cl_khr_subgroups int2 __ovld __conv sub_group_broadcast( int2 value, uint index ); int3 __ovld __conv sub_group_broadcast( int3 value, uint index ); int4 __ovld __conv sub_group_broadcast( int4 value, uint index ); int8 __ovld __conv sub_group_broadcast( int8 value, uint index ); int16 __ovld __conv sub_group_broadcast( int16 value, uint index ); // scalar uint broadcast is part of cl_khr_subgroups uint2 __ovld __conv sub_group_broadcast( uint2 value, uint index ); uint3 __ovld __conv sub_group_broadcast( uint3 value, uint index ); uint4 __ovld __conv sub_group_broadcast( uint4 value, uint index ); uint8 __ovld __conv sub_group_broadcast( uint8 value, uint index ); uint16 __ovld __conv sub_group_broadcast( uint16 value, uint index ); // scalar long broadcast is part of cl_khr_subgroups long2 __ovld __conv sub_group_broadcast( long2 value, uint index ); long3 __ovld __conv sub_group_broadcast( long3 value, uint index ); long4 __ovld __conv sub_group_broadcast( long4 value, uint index ); long8 __ovld __conv sub_group_broadcast( long8 value, uint index ); long16 __ovld __conv sub_group_broadcast( long16 value, uint index ); // scalar ulong broadcast is part of cl_khr_subgroups ulong2 __ovld __conv sub_group_broadcast( ulong2 value, uint index ); ulong3 __ovld __conv sub_group_broadcast( ulong3 value, uint index ); ulong4 __ovld __conv sub_group_broadcast( ulong4 value, uint index ); ulong8 __ovld __conv sub_group_broadcast( ulong8 value, uint index ); ulong16 __ovld __conv sub_group_broadcast( ulong16 value, uint index ); // scalar float broadcast is part of cl_khr_subgroups float2 __ovld __conv sub_group_broadcast( float2 value, uint index ); float3 __ovld __conv sub_group_broadcast( float3 value, uint index ); float4 __ovld __conv sub_group_broadcast( float4 value, uint index ); float8 __ovld __conv sub_group_broadcast( float8 value, uint index ); float16 __ovld __conv sub_group_broadcast( float16 value, uint index ); char __ovld __conv sub_group_reduce_add( char value ); uchar __ovld __conv sub_group_reduce_add( uchar value ); short __ovld __conv sub_group_reduce_add( short value ); ushort __ovld __conv sub_group_reduce_add( ushort value ); char __ovld __conv sub_group_reduce_min( char value ); uchar __ovld __conv sub_group_reduce_min( uchar value ); short __ovld __conv sub_group_reduce_min( short value ); ushort __ovld __conv sub_group_reduce_min( ushort value ); char __ovld __conv sub_group_reduce_max( char value ); uchar __ovld __conv sub_group_reduce_max( uchar value ); short __ovld __conv sub_group_reduce_max( short value ); ushort __ovld __conv sub_group_reduce_max( ushort value ); char __ovld __conv sub_group_scan_inclusive_add( char value ); uchar __ovld __conv sub_group_scan_inclusive_add( uchar value ); short __ovld __conv sub_group_scan_inclusive_add( short value ); ushort __ovld __conv sub_group_scan_inclusive_add( ushort value ); char __ovld __conv sub_group_scan_inclusive_min( char value ); uchar __ovld __conv sub_group_scan_inclusive_min( uchar value ); short __ovld __conv sub_group_scan_inclusive_min( short value ); ushort __ovld __conv sub_group_scan_inclusive_min( ushort value ); char __ovld __conv sub_group_scan_inclusive_max( char value ); uchar __ovld __conv sub_group_scan_inclusive_max( uchar value ); short __ovld __conv sub_group_scan_inclusive_max( short value ); ushort __ovld __conv sub_group_scan_inclusive_max( ushort value ); char __ovld __conv sub_group_scan_exclusive_add( char value ); uchar __ovld __conv sub_group_scan_exclusive_add( uchar value ); short __ovld __conv sub_group_scan_exclusive_add( short value ); ushort __ovld __conv sub_group_scan_exclusive_add( ushort value ); char __ovld __conv sub_group_scan_exclusive_min( char value ); uchar __ovld __conv sub_group_scan_exclusive_min( uchar value ); short __ovld __conv sub_group_scan_exclusive_min( short value ); ushort __ovld __conv sub_group_scan_exclusive_min( ushort value ); char __ovld __conv sub_group_scan_exclusive_max( char value ); uchar __ovld __conv sub_group_scan_exclusive_max( uchar value ); short __ovld __conv sub_group_scan_exclusive_max( short value ); ushort __ovld __conv sub_group_scan_exclusive_max( ushort value ); #if defined(cl_khr_fp16) // scalar half broadcast is part of cl_khr_subgroups half2 __ovld __conv sub_group_broadcast( half2 value, uint index ); half3 __ovld __conv sub_group_broadcast( half3 value, uint index ); half4 __ovld __conv sub_group_broadcast( half4 value, uint index ); half8 __ovld __conv sub_group_broadcast( half8 value, uint index ); half16 __ovld __conv sub_group_broadcast( half16 value, uint index ); #endif // cl_khr_fp16 #if defined(cl_khr_fp64) // scalar double broadcast is part of cl_khr_subgroups double2 __ovld __conv sub_group_broadcast( double2 value, uint index ); double3 __ovld __conv sub_group_broadcast( double3 value, uint index ); double4 __ovld __conv sub_group_broadcast( double4 value, uint index ); double8 __ovld __conv sub_group_broadcast( double8 value, uint index ); double16 __ovld __conv sub_group_broadcast( double16 value, uint index ); #endif // cl_khr_fp64 #endif // cl_khr_subgroup_extended_types #if defined(cl_khr_subgroup_non_uniform_vote) int __ovld sub_group_elect(void); int __ovld sub_group_non_uniform_all( int predicate ); int __ovld sub_group_non_uniform_any( int predicate ); int __ovld sub_group_non_uniform_all_equal( char value ); int __ovld sub_group_non_uniform_all_equal( uchar value ); int __ovld sub_group_non_uniform_all_equal( short value ); int __ovld sub_group_non_uniform_all_equal( ushort value ); int __ovld sub_group_non_uniform_all_equal( int value ); int __ovld sub_group_non_uniform_all_equal( uint value ); int __ovld sub_group_non_uniform_all_equal( long value ); int __ovld sub_group_non_uniform_all_equal( ulong value ); int __ovld sub_group_non_uniform_all_equal( float value ); #if defined(cl_khr_fp16) int __ovld sub_group_non_uniform_all_equal( half value ); #endif // cl_khr_fp16 #if defined(cl_khr_fp64) int __ovld sub_group_non_uniform_all_equal( double value ); #endif // cl_khr_fp64 #endif // cl_khr_subgroup_non_uniform_vote #if defined(cl_khr_subgroup_ballot) char __ovld sub_group_non_uniform_broadcast( char value, uint index ); char2 __ovld sub_group_non_uniform_broadcast( char2 value, uint index ); char3 __ovld sub_group_non_uniform_broadcast( char3 value, uint index ); char4 __ovld sub_group_non_uniform_broadcast( char4 value, uint index ); char8 __ovld sub_group_non_uniform_broadcast( char8 value, uint index ); char16 __ovld sub_group_non_uniform_broadcast( char16 value, uint index ); uchar __ovld sub_group_non_uniform_broadcast( uchar value, uint index ); uchar2 __ovld sub_group_non_uniform_broadcast( uchar2 value, uint index ); uchar3 __ovld sub_group_non_uniform_broadcast( uchar3 value, uint index ); uchar4 __ovld sub_group_non_uniform_broadcast( uchar4 value, uint index ); uchar8 __ovld sub_group_non_uniform_broadcast( uchar8 value, uint index ); uchar16 __ovld sub_group_non_uniform_broadcast( uchar16 value, uint index ); short __ovld sub_group_non_uniform_broadcast( short value, uint index ); short2 __ovld sub_group_non_uniform_broadcast( short2 value, uint index ); short3 __ovld sub_group_non_uniform_broadcast( short3 value, uint index ); short4 __ovld sub_group_non_uniform_broadcast( short4 value, uint index ); short8 __ovld sub_group_non_uniform_broadcast( short8 value, uint index ); short16 __ovld sub_group_non_uniform_broadcast( short16 value, uint index ); ushort __ovld sub_group_non_uniform_broadcast( ushort value, uint index ); ushort2 __ovld sub_group_non_uniform_broadcast( ushort2 value, uint index ); ushort3 __ovld sub_group_non_uniform_broadcast( ushort3 value, uint index ); ushort4 __ovld sub_group_non_uniform_broadcast( ushort4 value, uint index ); ushort8 __ovld sub_group_non_uniform_broadcast( ushort8 value, uint index ); ushort16 __ovld sub_group_non_uniform_broadcast( ushort16 value, uint index ); int __ovld sub_group_non_uniform_broadcast( int value, uint index ); int2 __ovld sub_group_non_uniform_broadcast( int2 value, uint index ); int3 __ovld sub_group_non_uniform_broadcast( int3 value, uint index ); int4 __ovld sub_group_non_uniform_broadcast( int4 value, uint index ); int8 __ovld sub_group_non_uniform_broadcast( int8 value, uint index ); int16 __ovld sub_group_non_uniform_broadcast( int16 value, uint index ); uint __ovld sub_group_non_uniform_broadcast( uint value, uint index ); uint2 __ovld sub_group_non_uniform_broadcast( uint2 value, uint index ); uint3 __ovld sub_group_non_uniform_broadcast( uint3 value, uint index ); uint4 __ovld sub_group_non_uniform_broadcast( uint4 value, uint index ); uint8 __ovld sub_group_non_uniform_broadcast( uint8 value, uint index ); uint16 __ovld sub_group_non_uniform_broadcast( uint16 value, uint index ); long __ovld sub_group_non_uniform_broadcast( long value, uint index ); long2 __ovld sub_group_non_uniform_broadcast( long2 value, uint index ); long3 __ovld sub_group_non_uniform_broadcast( long3 value, uint index ); long4 __ovld sub_group_non_uniform_broadcast( long4 value, uint index ); long8 __ovld sub_group_non_uniform_broadcast( long8 value, uint index ); long16 __ovld sub_group_non_uniform_broadcast( long16 value, uint index ); ulong __ovld sub_group_non_uniform_broadcast( ulong value, uint index ); ulong2 __ovld sub_group_non_uniform_broadcast( ulong2 value, uint index ); ulong3 __ovld sub_group_non_uniform_broadcast( ulong3 value, uint index ); ulong4 __ovld sub_group_non_uniform_broadcast( ulong4 value, uint index ); ulong8 __ovld sub_group_non_uniform_broadcast( ulong8 value, uint index ); ulong16 __ovld sub_group_non_uniform_broadcast( ulong16 value, uint index ); float __ovld sub_group_non_uniform_broadcast( float value, uint index ); float2 __ovld sub_group_non_uniform_broadcast( float2 value, uint index ); float3 __ovld sub_group_non_uniform_broadcast( float3 value, uint index ); float4 __ovld sub_group_non_uniform_broadcast( float4 value, uint index ); float8 __ovld sub_group_non_uniform_broadcast( float8 value, uint index ); float16 __ovld sub_group_non_uniform_broadcast( float16 value, uint index ); char __ovld sub_group_broadcast_first( char value ); uchar __ovld sub_group_broadcast_first( uchar value ); short __ovld sub_group_broadcast_first( short value ); ushort __ovld sub_group_broadcast_first( ushort value ); int __ovld sub_group_broadcast_first( int value ); uint __ovld sub_group_broadcast_first( uint value ); long __ovld sub_group_broadcast_first( long value ); ulong __ovld sub_group_broadcast_first( ulong value ); float __ovld sub_group_broadcast_first( float value ); uint4 __ovld sub_group_ballot( int predicate ); int __ovld __cnfn sub_group_inverse_ballot( uint4 value ); int __ovld __cnfn sub_group_ballot_bit_extract( uint4 value, uint index ); uint __ovld __cnfn sub_group_ballot_bit_count( uint4 value ); uint __ovld sub_group_ballot_inclusive_scan( uint4 value ); uint __ovld sub_group_ballot_exclusive_scan( uint4 value ); uint __ovld sub_group_ballot_find_lsb( uint4 value ); uint __ovld sub_group_ballot_find_msb( uint4 value ); uint4 __ovld __cnfn get_sub_group_eq_mask(void); uint4 __ovld __cnfn get_sub_group_ge_mask(void); uint4 __ovld __cnfn get_sub_group_gt_mask(void); uint4 __ovld __cnfn get_sub_group_le_mask(void); uint4 __ovld __cnfn get_sub_group_lt_mask(void); #if defined(cl_khr_fp16) half __ovld sub_group_non_uniform_broadcast( half value, uint index ); half2 __ovld sub_group_non_uniform_broadcast( half2 value, uint index ); half3 __ovld sub_group_non_uniform_broadcast( half3 value, uint index ); half4 __ovld sub_group_non_uniform_broadcast( half4 value, uint index ); half8 __ovld sub_group_non_uniform_broadcast( half8 value, uint index ); half16 __ovld sub_group_non_uniform_broadcast( half16 value, uint index ); half __ovld sub_group_broadcast_first( half value ); #endif // cl_khr_fp16 #if defined(cl_khr_fp64) double __ovld sub_group_non_uniform_broadcast( double value, uint index ); double2 __ovld sub_group_non_uniform_broadcast( double2 value, uint index ); double3 __ovld sub_group_non_uniform_broadcast( double3 value, uint index ); double4 __ovld sub_group_non_uniform_broadcast( double4 value, uint index ); double8 __ovld sub_group_non_uniform_broadcast( double8 value, uint index ); double16 __ovld sub_group_non_uniform_broadcast( double16 value, uint index ); double __ovld sub_group_broadcast_first( double value ); #endif // cl_khr_fp64 #endif // cl_khr_subgroup_ballot #if defined(cl_khr_subgroup_non_uniform_arithmetic) char __ovld sub_group_non_uniform_reduce_add( char value ); uchar __ovld sub_group_non_uniform_reduce_add( uchar value ); short __ovld sub_group_non_uniform_reduce_add( short value ); ushort __ovld sub_group_non_uniform_reduce_add( ushort value ); int __ovld sub_group_non_uniform_reduce_add( int value ); uint __ovld sub_group_non_uniform_reduce_add( uint value ); long __ovld sub_group_non_uniform_reduce_add( long value ); ulong __ovld sub_group_non_uniform_reduce_add( ulong value ); float __ovld sub_group_non_uniform_reduce_add( float value ); char __ovld sub_group_non_uniform_reduce_mul( char value ); uchar __ovld sub_group_non_uniform_reduce_mul( uchar value ); short __ovld sub_group_non_uniform_reduce_mul( short value ); ushort __ovld sub_group_non_uniform_reduce_mul( ushort value ); int __ovld sub_group_non_uniform_reduce_mul( int value ); uint __ovld sub_group_non_uniform_reduce_mul( uint value ); long __ovld sub_group_non_uniform_reduce_mul( long value ); ulong __ovld sub_group_non_uniform_reduce_mul( ulong value ); float __ovld sub_group_non_uniform_reduce_mul( float value ); char __ovld sub_group_non_uniform_reduce_min( char value ); uchar __ovld sub_group_non_uniform_reduce_min( uchar value ); short __ovld sub_group_non_uniform_reduce_min( short value ); ushort __ovld sub_group_non_uniform_reduce_min( ushort value ); int __ovld sub_group_non_uniform_reduce_min( int value ); uint __ovld sub_group_non_uniform_reduce_min( uint value ); long __ovld sub_group_non_uniform_reduce_min( long value ); ulong __ovld sub_group_non_uniform_reduce_min( ulong value ); float __ovld sub_group_non_uniform_reduce_min( float value ); char __ovld sub_group_non_uniform_reduce_max( char value ); uchar __ovld sub_group_non_uniform_reduce_max( uchar value ); short __ovld sub_group_non_uniform_reduce_max( short value ); ushort __ovld sub_group_non_uniform_reduce_max( ushort value ); int __ovld sub_group_non_uniform_reduce_max( int value ); uint __ovld sub_group_non_uniform_reduce_max( uint value ); long __ovld sub_group_non_uniform_reduce_max( long value ); ulong __ovld sub_group_non_uniform_reduce_max( ulong value ); float __ovld sub_group_non_uniform_reduce_max( float value ); char __ovld sub_group_non_uniform_scan_inclusive_add( char value ); uchar __ovld sub_group_non_uniform_scan_inclusive_add( uchar value ); short __ovld sub_group_non_uniform_scan_inclusive_add( short value ); ushort __ovld sub_group_non_uniform_scan_inclusive_add( ushort value ); int __ovld sub_group_non_uniform_scan_inclusive_add( int value ); uint __ovld sub_group_non_uniform_scan_inclusive_add( uint value ); long __ovld sub_group_non_uniform_scan_inclusive_add( long value ); ulong __ovld sub_group_non_uniform_scan_inclusive_add( ulong value ); float __ovld sub_group_non_uniform_scan_inclusive_add( float value ); char __ovld sub_group_non_uniform_scan_inclusive_mul( char value ); uchar __ovld sub_group_non_uniform_scan_inclusive_mul( uchar value ); short __ovld sub_group_non_uniform_scan_inclusive_mul( short value ); ushort __ovld sub_group_non_uniform_scan_inclusive_mul( ushort value ); int __ovld sub_group_non_uniform_scan_inclusive_mul( int value ); uint __ovld sub_group_non_uniform_scan_inclusive_mul( uint value ); long __ovld sub_group_non_uniform_scan_inclusive_mul( long value ); ulong __ovld sub_group_non_uniform_scan_inclusive_mul( ulong value ); float __ovld sub_group_non_uniform_scan_inclusive_mul( float value ); char __ovld sub_group_non_uniform_scan_inclusive_min( char value ); uchar __ovld sub_group_non_uniform_scan_inclusive_min( uchar value ); short __ovld sub_group_non_uniform_scan_inclusive_min( short value ); ushort __ovld sub_group_non_uniform_scan_inclusive_min( ushort value ); int __ovld sub_group_non_uniform_scan_inclusive_min( int value ); uint __ovld sub_group_non_uniform_scan_inclusive_min( uint value ); long __ovld sub_group_non_uniform_scan_inclusive_min( long value ); ulong __ovld sub_group_non_uniform_scan_inclusive_min( ulong value ); float __ovld sub_group_non_uniform_scan_inclusive_min( float value ); char __ovld sub_group_non_uniform_scan_inclusive_max( char value ); uchar __ovld sub_group_non_uniform_scan_inclusive_max( uchar value ); short __ovld sub_group_non_uniform_scan_inclusive_max( short value ); ushort __ovld sub_group_non_uniform_scan_inclusive_max( ushort value ); int __ovld sub_group_non_uniform_scan_inclusive_max( int value ); uint __ovld sub_group_non_uniform_scan_inclusive_max( uint value ); long __ovld sub_group_non_uniform_scan_inclusive_max( long value ); ulong __ovld sub_group_non_uniform_scan_inclusive_max( ulong value ); float __ovld sub_group_non_uniform_scan_inclusive_max( float value ); char __ovld sub_group_non_uniform_scan_exclusive_add( char value ); uchar __ovld sub_group_non_uniform_scan_exclusive_add( uchar value ); short __ovld sub_group_non_uniform_scan_exclusive_add( short value ); ushort __ovld sub_group_non_uniform_scan_exclusive_add( ushort value ); int __ovld sub_group_non_uniform_scan_exclusive_add( int value ); uint __ovld sub_group_non_uniform_scan_exclusive_add( uint value ); long __ovld sub_group_non_uniform_scan_exclusive_add( long value ); ulong __ovld sub_group_non_uniform_scan_exclusive_add( ulong value ); float __ovld sub_group_non_uniform_scan_exclusive_add( float value ); char __ovld sub_group_non_uniform_scan_exclusive_mul( char value ); uchar __ovld sub_group_non_uniform_scan_exclusive_mul( uchar value ); short __ovld sub_group_non_uniform_scan_exclusive_mul( short value ); ushort __ovld sub_group_non_uniform_scan_exclusive_mul( ushort value ); int __ovld sub_group_non_uniform_scan_exclusive_mul( int value ); uint __ovld sub_group_non_uniform_scan_exclusive_mul( uint value ); long __ovld sub_group_non_uniform_scan_exclusive_mul( long value ); ulong __ovld sub_group_non_uniform_scan_exclusive_mul( ulong value ); float __ovld sub_group_non_uniform_scan_exclusive_mul( float value ); char __ovld sub_group_non_uniform_scan_exclusive_min( char value ); uchar __ovld sub_group_non_uniform_scan_exclusive_min( uchar value ); short __ovld sub_group_non_uniform_scan_exclusive_min( short value ); ushort __ovld sub_group_non_uniform_scan_exclusive_min( ushort value ); int __ovld sub_group_non_uniform_scan_exclusive_min( int value ); uint __ovld sub_group_non_uniform_scan_exclusive_min( uint value ); long __ovld sub_group_non_uniform_scan_exclusive_min( long value ); ulong __ovld sub_group_non_uniform_scan_exclusive_min( ulong value ); float __ovld sub_group_non_uniform_scan_exclusive_min( float value ); char __ovld sub_group_non_uniform_scan_exclusive_max( char value ); uchar __ovld sub_group_non_uniform_scan_exclusive_max( uchar value ); short __ovld sub_group_non_uniform_scan_exclusive_max( short value ); ushort __ovld sub_group_non_uniform_scan_exclusive_max( ushort value ); int __ovld sub_group_non_uniform_scan_exclusive_max( int value ); uint __ovld sub_group_non_uniform_scan_exclusive_max( uint value ); long __ovld sub_group_non_uniform_scan_exclusive_max( long value ); ulong __ovld sub_group_non_uniform_scan_exclusive_max( ulong value ); float __ovld sub_group_non_uniform_scan_exclusive_max( float value ); char __ovld sub_group_non_uniform_reduce_and( char value ); uchar __ovld sub_group_non_uniform_reduce_and( uchar value ); short __ovld sub_group_non_uniform_reduce_and( short value ); ushort __ovld sub_group_non_uniform_reduce_and( ushort value ); int __ovld sub_group_non_uniform_reduce_and( int value ); uint __ovld sub_group_non_uniform_reduce_and( uint value ); long __ovld sub_group_non_uniform_reduce_and( long value ); ulong __ovld sub_group_non_uniform_reduce_and( ulong value ); char __ovld sub_group_non_uniform_reduce_or( char value ); uchar __ovld sub_group_non_uniform_reduce_or( uchar value ); short __ovld sub_group_non_uniform_reduce_or( short value ); ushort __ovld sub_group_non_uniform_reduce_or( ushort value ); int __ovld sub_group_non_uniform_reduce_or( int value ); uint __ovld sub_group_non_uniform_reduce_or( uint value ); long __ovld sub_group_non_uniform_reduce_or( long value ); ulong __ovld sub_group_non_uniform_reduce_or( ulong value ); char __ovld sub_group_non_uniform_reduce_xor( char value ); uchar __ovld sub_group_non_uniform_reduce_xor( uchar value ); short __ovld sub_group_non_uniform_reduce_xor( short value ); ushort __ovld sub_group_non_uniform_reduce_xor( ushort value ); int __ovld sub_group_non_uniform_reduce_xor( int value ); uint __ovld sub_group_non_uniform_reduce_xor( uint value ); long __ovld sub_group_non_uniform_reduce_xor( long value ); ulong __ovld sub_group_non_uniform_reduce_xor( ulong value ); char __ovld sub_group_non_uniform_scan_inclusive_and( char value ); uchar __ovld sub_group_non_uniform_scan_inclusive_and( uchar value ); short __ovld sub_group_non_uniform_scan_inclusive_and( short value ); ushort __ovld sub_group_non_uniform_scan_inclusive_and( ushort value ); int __ovld sub_group_non_uniform_scan_inclusive_and( int value ); uint __ovld sub_group_non_uniform_scan_inclusive_and( uint value ); long __ovld sub_group_non_uniform_scan_inclusive_and( long value ); ulong __ovld sub_group_non_uniform_scan_inclusive_and( ulong value ); char __ovld sub_group_non_uniform_scan_inclusive_or( char value ); uchar __ovld sub_group_non_uniform_scan_inclusive_or( uchar value ); short __ovld sub_group_non_uniform_scan_inclusive_or( short value ); ushort __ovld sub_group_non_uniform_scan_inclusive_or( ushort value ); int __ovld sub_group_non_uniform_scan_inclusive_or( int value ); uint __ovld sub_group_non_uniform_scan_inclusive_or( uint value ); long __ovld sub_group_non_uniform_scan_inclusive_or( long value ); ulong __ovld sub_group_non_uniform_scan_inclusive_or( ulong value ); char __ovld sub_group_non_uniform_scan_inclusive_xor( char value ); uchar __ovld sub_group_non_uniform_scan_inclusive_xor( uchar value ); short __ovld sub_group_non_uniform_scan_inclusive_xor( short value ); ushort __ovld sub_group_non_uniform_scan_inclusive_xor( ushort value ); int __ovld sub_group_non_uniform_scan_inclusive_xor( int value ); uint __ovld sub_group_non_uniform_scan_inclusive_xor( uint value ); long __ovld sub_group_non_uniform_scan_inclusive_xor( long value ); ulong __ovld sub_group_non_uniform_scan_inclusive_xor( ulong value ); char __ovld sub_group_non_uniform_scan_exclusive_and( char value ); uchar __ovld sub_group_non_uniform_scan_exclusive_and( uchar value ); short __ovld sub_group_non_uniform_scan_exclusive_and( short value ); ushort __ovld sub_group_non_uniform_scan_exclusive_and( ushort value ); int __ovld sub_group_non_uniform_scan_exclusive_and( int value ); uint __ovld sub_group_non_uniform_scan_exclusive_and( uint value ); long __ovld sub_group_non_uniform_scan_exclusive_and( long value ); ulong __ovld sub_group_non_uniform_scan_exclusive_and( ulong value ); char __ovld sub_group_non_uniform_scan_exclusive_or( char value ); uchar __ovld sub_group_non_uniform_scan_exclusive_or( uchar value ); short __ovld sub_group_non_uniform_scan_exclusive_or( short value ); ushort __ovld sub_group_non_uniform_scan_exclusive_or( ushort value ); int __ovld sub_group_non_uniform_scan_exclusive_or( int value ); uint __ovld sub_group_non_uniform_scan_exclusive_or( uint value ); long __ovld sub_group_non_uniform_scan_exclusive_or( long value ); ulong __ovld sub_group_non_uniform_scan_exclusive_or( ulong value ); char __ovld sub_group_non_uniform_scan_exclusive_xor( char value ); uchar __ovld sub_group_non_uniform_scan_exclusive_xor( uchar value ); short __ovld sub_group_non_uniform_scan_exclusive_xor( short value ); ushort __ovld sub_group_non_uniform_scan_exclusive_xor( ushort value ); int __ovld sub_group_non_uniform_scan_exclusive_xor( int value ); uint __ovld sub_group_non_uniform_scan_exclusive_xor( uint value ); long __ovld sub_group_non_uniform_scan_exclusive_xor( long value ); ulong __ovld sub_group_non_uniform_scan_exclusive_xor( ulong value ); int __ovld sub_group_non_uniform_reduce_logical_and( int predicate ); int __ovld sub_group_non_uniform_reduce_logical_or( int predicate ); int __ovld sub_group_non_uniform_reduce_logical_xor( int predicate ); int __ovld sub_group_non_uniform_scan_inclusive_logical_and( int predicate ); int __ovld sub_group_non_uniform_scan_inclusive_logical_or( int predicate ); int __ovld sub_group_non_uniform_scan_inclusive_logical_xor( int predicate ); int __ovld sub_group_non_uniform_scan_exclusive_logical_and( int predicate ); int __ovld sub_group_non_uniform_scan_exclusive_logical_or( int predicate ); int __ovld sub_group_non_uniform_scan_exclusive_logical_xor( int predicate ); #if defined(cl_khr_fp16) half __ovld sub_group_non_uniform_reduce_add( half value ); half __ovld sub_group_non_uniform_reduce_mul( half value ); half __ovld sub_group_non_uniform_reduce_min( half value ); half __ovld sub_group_non_uniform_reduce_max( half value ); half __ovld sub_group_non_uniform_scan_inclusive_add( half value ); half __ovld sub_group_non_uniform_scan_inclusive_mul( half value ); half __ovld sub_group_non_uniform_scan_inclusive_min( half value ); half __ovld sub_group_non_uniform_scan_inclusive_max( half value ); half __ovld sub_group_non_uniform_scan_exclusive_add( half value ); half __ovld sub_group_non_uniform_scan_exclusive_mul( half value ); half __ovld sub_group_non_uniform_scan_exclusive_min( half value ); half __ovld sub_group_non_uniform_scan_exclusive_max( half value ); #endif // cl_khr_fp16 #if defined(cl_khr_fp64) double __ovld sub_group_non_uniform_reduce_add( double value ); double __ovld sub_group_non_uniform_reduce_mul( double value ); double __ovld sub_group_non_uniform_reduce_min( double value ); double __ovld sub_group_non_uniform_reduce_max( double value ); double __ovld sub_group_non_uniform_scan_inclusive_add( double value ); double __ovld sub_group_non_uniform_scan_inclusive_mul( double value ); double __ovld sub_group_non_uniform_scan_inclusive_min( double value ); double __ovld sub_group_non_uniform_scan_inclusive_max( double value ); double __ovld sub_group_non_uniform_scan_exclusive_add( double value ); double __ovld sub_group_non_uniform_scan_exclusive_mul( double value ); double __ovld sub_group_non_uniform_scan_exclusive_min( double value ); double __ovld sub_group_non_uniform_scan_exclusive_max( double value ); #endif // cl_khr_fp64 #endif // cl_khr_subgroup_non_uniform_arithmetic #if defined(cl_khr_subgroup_shuffle) char __ovld sub_group_shuffle( char value, uint index ); uchar __ovld sub_group_shuffle( uchar value, uint index ); short __ovld sub_group_shuffle( short value, uint index ); ushort __ovld sub_group_shuffle( ushort value, uint index ); int __ovld sub_group_shuffle( int value, uint index ); uint __ovld sub_group_shuffle( uint value, uint index ); long __ovld sub_group_shuffle( long value, uint index ); ulong __ovld sub_group_shuffle( ulong value, uint index ); float __ovld sub_group_shuffle( float value, uint index ); char __ovld sub_group_shuffle_xor( char value, uint mask ); uchar __ovld sub_group_shuffle_xor( uchar value, uint mask ); short __ovld sub_group_shuffle_xor( short value, uint mask ); ushort __ovld sub_group_shuffle_xor( ushort value, uint mask ); int __ovld sub_group_shuffle_xor( int value, uint mask ); uint __ovld sub_group_shuffle_xor( uint value, uint mask ); long __ovld sub_group_shuffle_xor( long value, uint mask ); ulong __ovld sub_group_shuffle_xor( ulong value, uint mask ); float __ovld sub_group_shuffle_xor( float value, uint mask ); #if defined(cl_khr_fp16) half __ovld sub_group_shuffle( half value, uint index ); half __ovld sub_group_shuffle_xor( half value, uint mask ); #endif // cl_khr_fp16 #if defined(cl_khr_fp64) double __ovld sub_group_shuffle( double value, uint index ); double __ovld sub_group_shuffle_xor( double value, uint mask ); #endif // cl_khr_fp64 #endif // cl_khr_subgroup_shuffle #if defined(cl_khr_subgroup_shuffle_relative) char __ovld sub_group_shuffle_up( char value, uint delta ); uchar __ovld sub_group_shuffle_up( uchar value, uint delta ); short __ovld sub_group_shuffle_up( short value, uint delta ); ushort __ovld sub_group_shuffle_up( ushort value, uint delta ); int __ovld sub_group_shuffle_up( int value, uint delta ); uint __ovld sub_group_shuffle_up( uint value, uint delta ); long __ovld sub_group_shuffle_up( long value, uint delta ); ulong __ovld sub_group_shuffle_up( ulong value, uint delta ); float __ovld sub_group_shuffle_up( float value, uint delta ); char __ovld sub_group_shuffle_down( char value, uint delta ); uchar __ovld sub_group_shuffle_down( uchar value, uint delta ); short __ovld sub_group_shuffle_down( short value, uint delta ); ushort __ovld sub_group_shuffle_down( ushort value, uint delta ); int __ovld sub_group_shuffle_down( int value, uint delta ); uint __ovld sub_group_shuffle_down( uint value, uint delta ); long __ovld sub_group_shuffle_down( long value, uint delta ); ulong __ovld sub_group_shuffle_down( ulong value, uint delta ); float __ovld sub_group_shuffle_down( float value, uint delta ); #if defined(cl_khr_fp16) half __ovld sub_group_shuffle_up( half value, uint delta ); half __ovld sub_group_shuffle_down( half value, uint delta ); #endif // cl_khr_fp16 #if defined(cl_khr_fp64) double __ovld sub_group_shuffle_up( double value, uint delta ); double __ovld sub_group_shuffle_down( double value, uint delta ); #endif // cl_khr_fp64 #endif // cl_khr_subgroup_shuffle_relative #if defined(cl_khr_subgroup_clustered_reduce) char __ovld sub_group_clustered_reduce_add( char value, uint clustersize ); uchar __ovld sub_group_clustered_reduce_add( uchar value, uint clustersize ); short __ovld sub_group_clustered_reduce_add( short value, uint clustersize ); ushort __ovld sub_group_clustered_reduce_add( ushort value, uint clustersize ); int __ovld sub_group_clustered_reduce_add( int value, uint clustersize ); uint __ovld sub_group_clustered_reduce_add( uint value, uint clustersize ); long __ovld sub_group_clustered_reduce_add( long value, uint clustersize ); ulong __ovld sub_group_clustered_reduce_add( ulong value, uint clustersize ); float __ovld sub_group_clustered_reduce_add( float value, uint clustersize ); char __ovld sub_group_clustered_reduce_mul( char value, uint clustersize ); uchar __ovld sub_group_clustered_reduce_mul( uchar value, uint clustersize ); short __ovld sub_group_clustered_reduce_mul( short value, uint clustersize ); ushort __ovld sub_group_clustered_reduce_mul( ushort value, uint clustersize ); int __ovld sub_group_clustered_reduce_mul( int value, uint clustersize ); uint __ovld sub_group_clustered_reduce_mul( uint value, uint clustersize ); long __ovld sub_group_clustered_reduce_mul( long value, uint clustersize ); ulong __ovld sub_group_clustered_reduce_mul( ulong value, uint clustersize ); float __ovld sub_group_clustered_reduce_mul( float value, uint clustersize ); char __ovld sub_group_clustered_reduce_min( char value, uint clustersize ); uchar __ovld sub_group_clustered_reduce_min( uchar value, uint clustersize ); short __ovld sub_group_clustered_reduce_min( short value, uint clustersize ); ushort __ovld sub_group_clustered_reduce_min( ushort value, uint clustersize ); int __ovld sub_group_clustered_reduce_min( int value, uint clustersize ); uint __ovld sub_group_clustered_reduce_min( uint value, uint clustersize ); long __ovld sub_group_clustered_reduce_min( long value, uint clustersize ); ulong __ovld sub_group_clustered_reduce_min( ulong value, uint clustersize ); float __ovld sub_group_clustered_reduce_min( float value, uint clustersize ); char __ovld sub_group_clustered_reduce_max( char value, uint clustersize ); uchar __ovld sub_group_clustered_reduce_max( uchar value, uint clustersize ); short __ovld sub_group_clustered_reduce_max( short value, uint clustersize ); ushort __ovld sub_group_clustered_reduce_max( ushort value, uint clustersize ); int __ovld sub_group_clustered_reduce_max( int value, uint clustersize ); uint __ovld sub_group_clustered_reduce_max( uint value, uint clustersize ); long __ovld sub_group_clustered_reduce_max( long value, uint clustersize ); ulong __ovld sub_group_clustered_reduce_max( ulong value, uint clustersize ); float __ovld sub_group_clustered_reduce_max( float value, uint clustersize ); char __ovld sub_group_clustered_reduce_and( char value, uint clustersize ); uchar __ovld sub_group_clustered_reduce_and( uchar value, uint clustersize ); short __ovld sub_group_clustered_reduce_and( short value, uint clustersize ); ushort __ovld sub_group_clustered_reduce_and( ushort value, uint clustersize ); int __ovld sub_group_clustered_reduce_and( int value, uint clustersize ); uint __ovld sub_group_clustered_reduce_and( uint value, uint clustersize ); long __ovld sub_group_clustered_reduce_and( long value, uint clustersize ); ulong __ovld sub_group_clustered_reduce_and( ulong value, uint clustersize ); char __ovld sub_group_clustered_reduce_or( char value, uint clustersize ); uchar __ovld sub_group_clustered_reduce_or( uchar value, uint clustersize ); short __ovld sub_group_clustered_reduce_or( short value, uint clustersize ); ushort __ovld sub_group_clustered_reduce_or( ushort value, uint clustersize ); int __ovld sub_group_clustered_reduce_or( int value, uint clustersize ); uint __ovld sub_group_clustered_reduce_or( uint value, uint clustersize ); long __ovld sub_group_clustered_reduce_or( long value, uint clustersize ); ulong __ovld sub_group_clustered_reduce_or( ulong value, uint clustersize ); char __ovld sub_group_clustered_reduce_xor( char value, uint clustersize ); uchar __ovld sub_group_clustered_reduce_xor( uchar value, uint clustersize ); short __ovld sub_group_clustered_reduce_xor( short value, uint clustersize ); ushort __ovld sub_group_clustered_reduce_xor( ushort value, uint clustersize ); int __ovld sub_group_clustered_reduce_xor( int value, uint clustersize ); uint __ovld sub_group_clustered_reduce_xor( uint value, uint clustersize ); long __ovld sub_group_clustered_reduce_xor( long value, uint clustersize ); ulong __ovld sub_group_clustered_reduce_xor( ulong value, uint clustersize ); int __ovld sub_group_clustered_reduce_logical_and( int predicate, uint clustersize ); int __ovld sub_group_clustered_reduce_logical_or( int predicate, uint clustersize ); int __ovld sub_group_clustered_reduce_logical_xor( int predicate, uint clustersize ); #if defined(cl_khr_fp16) half __ovld sub_group_clustered_reduce_add( half value, uint clustersize ); half __ovld sub_group_clustered_reduce_mul( half value, uint clustersize ); half __ovld sub_group_clustered_reduce_min( half value, uint clustersize ); half __ovld sub_group_clustered_reduce_max( half value, uint clustersize ); #endif // cl_khr_fp16 #if defined(cl_khr_fp64) double __ovld sub_group_clustered_reduce_add( double value, uint clustersize ); double __ovld sub_group_clustered_reduce_mul( double value, uint clustersize ); double __ovld sub_group_clustered_reduce_min( double value, uint clustersize ); double __ovld sub_group_clustered_reduce_max( double value, uint clustersize ); #endif // cl_khr_fp64 #endif // cl_khr_subgroup_clustered_reduce #if defined(cl_khr_extended_bit_ops) char __ovld __cnfn bitfield_insert(char, char, uint, uint); uchar __ovld __cnfn bitfield_insert(uchar, uchar, uint, uint); short __ovld __cnfn bitfield_insert(short, short, uint, uint); ushort __ovld __cnfn bitfield_insert(ushort, ushort, uint, uint); int __ovld __cnfn bitfield_insert(int, int, uint, uint); uint __ovld __cnfn bitfield_insert(uint, uint, uint, uint); long __ovld __cnfn bitfield_insert(long, long, uint, uint); ulong __ovld __cnfn bitfield_insert(ulong, ulong, uint, uint); char2 __ovld __cnfn bitfield_insert(char2, char2, uint, uint); uchar2 __ovld __cnfn bitfield_insert(uchar2, uchar2, uint, uint); short2 __ovld __cnfn bitfield_insert(short2, short2, uint, uint); ushort2 __ovld __cnfn bitfield_insert(ushort2, ushort2, uint, uint); int2 __ovld __cnfn bitfield_insert(int2, int2, uint, uint); uint2 __ovld __cnfn bitfield_insert(uint2, uint2, uint, uint); long2 __ovld __cnfn bitfield_insert(long2, long2, uint, uint); ulong2 __ovld __cnfn bitfield_insert(ulong2, ulong2, uint, uint); char3 __ovld __cnfn bitfield_insert(char3, char3, uint, uint); uchar3 __ovld __cnfn bitfield_insert(uchar3, uchar3, uint, uint); short3 __ovld __cnfn bitfield_insert(short3, short3, uint, uint); ushort3 __ovld __cnfn bitfield_insert(ushort3, ushort3, uint, uint); int3 __ovld __cnfn bitfield_insert(int3, int3, uint, uint); uint3 __ovld __cnfn bitfield_insert(uint3, uint3, uint, uint); long3 __ovld __cnfn bitfield_insert(long3, long3, uint, uint); ulong3 __ovld __cnfn bitfield_insert(ulong3, ulong3, uint, uint); char4 __ovld __cnfn bitfield_insert(char4, char4, uint, uint); uchar4 __ovld __cnfn bitfield_insert(uchar4, uchar4, uint, uint); short4 __ovld __cnfn bitfield_insert(short4, short4, uint, uint); ushort4 __ovld __cnfn bitfield_insert(ushort4, ushort4, uint, uint); int4 __ovld __cnfn bitfield_insert(int4, int4, uint, uint); uint4 __ovld __cnfn bitfield_insert(uint4, uint4, uint, uint); long4 __ovld __cnfn bitfield_insert(long4, long4, uint, uint); ulong4 __ovld __cnfn bitfield_insert(ulong4, ulong4, uint, uint); char8 __ovld __cnfn bitfield_insert(char8, char8, uint, uint); uchar8 __ovld __cnfn bitfield_insert(uchar8, uchar8, uint, uint); short8 __ovld __cnfn bitfield_insert(short8, short8, uint, uint); ushort8 __ovld __cnfn bitfield_insert(ushort8, ushort8, uint, uint); int8 __ovld __cnfn bitfield_insert(int8, int8, uint, uint); uint8 __ovld __cnfn bitfield_insert(uint8, uint8, uint, uint); long8 __ovld __cnfn bitfield_insert(long8, long8, uint, uint); ulong8 __ovld __cnfn bitfield_insert(ulong8, ulong8, uint, uint); char16 __ovld __cnfn bitfield_insert(char16, char16, uint, uint); uchar16 __ovld __cnfn bitfield_insert(uchar16, uchar16, uint, uint); short16 __ovld __cnfn bitfield_insert(short16, short16, uint, uint); ushort16 __ovld __cnfn bitfield_insert(ushort16, ushort16, uint, uint); int16 __ovld __cnfn bitfield_insert(int16, int16, uint, uint); uint16 __ovld __cnfn bitfield_insert(uint16, uint16, uint, uint); long16 __ovld __cnfn bitfield_insert(long16, long16, uint, uint); ulong16 __ovld __cnfn bitfield_insert(ulong16, ulong16, uint, uint); char __ovld __cnfn bitfield_extract_signed(char, uint, uint); short __ovld __cnfn bitfield_extract_signed(short, uint, uint); int __ovld __cnfn bitfield_extract_signed(int, uint, uint); long __ovld __cnfn bitfield_extract_signed(long, uint, uint); char2 __ovld __cnfn bitfield_extract_signed(char2, uint, uint); short2 __ovld __cnfn bitfield_extract_signed(short2, uint, uint); int2 __ovld __cnfn bitfield_extract_signed(int2, uint, uint); long2 __ovld __cnfn bitfield_extract_signed(long2, uint, uint); char3 __ovld __cnfn bitfield_extract_signed(char3, uint, uint); short3 __ovld __cnfn bitfield_extract_signed(short3, uint, uint); int3 __ovld __cnfn bitfield_extract_signed(int3, uint, uint); long3 __ovld __cnfn bitfield_extract_signed(long3, uint, uint); char4 __ovld __cnfn bitfield_extract_signed(char4, uint, uint); short4 __ovld __cnfn bitfield_extract_signed(short4, uint, uint); int4 __ovld __cnfn bitfield_extract_signed(int4, uint, uint); long4 __ovld __cnfn bitfield_extract_signed(long4, uint, uint); char8 __ovld __cnfn bitfield_extract_signed(char8, uint, uint); short8 __ovld __cnfn bitfield_extract_signed(short8, uint, uint); int8 __ovld __cnfn bitfield_extract_signed(int8, uint, uint); long8 __ovld __cnfn bitfield_extract_signed(long8, uint, uint); char16 __ovld __cnfn bitfield_extract_signed(char16, uint, uint); short16 __ovld __cnfn bitfield_extract_signed(short16, uint, uint); int16 __ovld __cnfn bitfield_extract_signed(int16, uint, uint); long16 __ovld __cnfn bitfield_extract_signed(long16, uint, uint); char __ovld __cnfn bitfield_extract_signed(uchar, uint, uint); short __ovld __cnfn bitfield_extract_signed(ushort, uint, uint); int __ovld __cnfn bitfield_extract_signed(uint, uint, uint); long __ovld __cnfn bitfield_extract_signed(ulong, uint, uint); char2 __ovld __cnfn bitfield_extract_signed(uchar2, uint, uint); short2 __ovld __cnfn bitfield_extract_signed(ushort2, uint, uint); int2 __ovld __cnfn bitfield_extract_signed(uint2, uint, uint); long2 __ovld __cnfn bitfield_extract_signed(ulong2, uint, uint); char3 __ovld __cnfn bitfield_extract_signed(uchar3, uint, uint); short3 __ovld __cnfn bitfield_extract_signed(ushort3, uint, uint); int3 __ovld __cnfn bitfield_extract_signed(uint3, uint, uint); long3 __ovld __cnfn bitfield_extract_signed(ulong3, uint, uint); char4 __ovld __cnfn bitfield_extract_signed(uchar4, uint, uint); short4 __ovld __cnfn bitfield_extract_signed(ushort4, uint, uint); int4 __ovld __cnfn bitfield_extract_signed(uint4, uint, uint); long4 __ovld __cnfn bitfield_extract_signed(ulong4, uint, uint); char8 __ovld __cnfn bitfield_extract_signed(uchar8, uint, uint); short8 __ovld __cnfn bitfield_extract_signed(ushort8, uint, uint); int8 __ovld __cnfn bitfield_extract_signed(uint8, uint, uint); long8 __ovld __cnfn bitfield_extract_signed(ulong8, uint, uint); char16 __ovld __cnfn bitfield_extract_signed(uchar16, uint, uint); short16 __ovld __cnfn bitfield_extract_signed(ushort16, uint, uint); int16 __ovld __cnfn bitfield_extract_signed(uint16, uint, uint); long16 __ovld __cnfn bitfield_extract_signed(ulong16, uint, uint); uchar __ovld __cnfn bitfield_extract_unsigned(char, uint, uint); ushort __ovld __cnfn bitfield_extract_unsigned(short, uint, uint); uint __ovld __cnfn bitfield_extract_unsigned(int, uint, uint); ulong __ovld __cnfn bitfield_extract_unsigned(long, uint, uint); uchar2 __ovld __cnfn bitfield_extract_unsigned(char2, uint, uint); ushort2 __ovld __cnfn bitfield_extract_unsigned(short2, uint, uint); uint2 __ovld __cnfn bitfield_extract_unsigned(int2, uint, uint); ulong2 __ovld __cnfn bitfield_extract_unsigned(long2, uint, uint); uchar3 __ovld __cnfn bitfield_extract_unsigned(char3, uint, uint); ushort3 __ovld __cnfn bitfield_extract_unsigned(short3, uint, uint); uint3 __ovld __cnfn bitfield_extract_unsigned(int3, uint, uint); ulong3 __ovld __cnfn bitfield_extract_unsigned(long3, uint, uint); uchar4 __ovld __cnfn bitfield_extract_unsigned(char4, uint, uint); ushort4 __ovld __cnfn bitfield_extract_unsigned(short4, uint, uint); uint4 __ovld __cnfn bitfield_extract_unsigned(int4, uint, uint); ulong4 __ovld __cnfn bitfield_extract_unsigned(long4, uint, uint); uchar8 __ovld __cnfn bitfield_extract_unsigned(char8, uint, uint); ushort8 __ovld __cnfn bitfield_extract_unsigned(short8, uint, uint); uint8 __ovld __cnfn bitfield_extract_unsigned(int8, uint, uint); ulong8 __ovld __cnfn bitfield_extract_unsigned(long8, uint, uint); uchar16 __ovld __cnfn bitfield_extract_unsigned(char16, uint, uint); ushort16 __ovld __cnfn bitfield_extract_unsigned(short16, uint, uint); uint16 __ovld __cnfn bitfield_extract_unsigned(int16, uint, uint); ulong16 __ovld __cnfn bitfield_extract_unsigned(long16, uint, uint); uchar __ovld __cnfn bitfield_extract_unsigned(uchar, uint, uint); ushort __ovld __cnfn bitfield_extract_unsigned(ushort, uint, uint); uint __ovld __cnfn bitfield_extract_unsigned(uint, uint, uint); ulong __ovld __cnfn bitfield_extract_unsigned(ulong, uint, uint); uchar2 __ovld __cnfn bitfield_extract_unsigned(uchar2, uint, uint); ushort2 __ovld __cnfn bitfield_extract_unsigned(ushort2, uint, uint); uint2 __ovld __cnfn bitfield_extract_unsigned(uint2, uint, uint); ulong2 __ovld __cnfn bitfield_extract_unsigned(ulong2, uint, uint); uchar3 __ovld __cnfn bitfield_extract_unsigned(uchar3, uint, uint); ushort3 __ovld __cnfn bitfield_extract_unsigned(ushort3, uint, uint); uint3 __ovld __cnfn bitfield_extract_unsigned(uint3, uint, uint); ulong3 __ovld __cnfn bitfield_extract_unsigned(ulong3, uint, uint); uchar4 __ovld __cnfn bitfield_extract_unsigned(uchar4, uint, uint); ushort4 __ovld __cnfn bitfield_extract_unsigned(ushort4, uint, uint); uint4 __ovld __cnfn bitfield_extract_unsigned(uint4, uint, uint); ulong4 __ovld __cnfn bitfield_extract_unsigned(ulong4, uint, uint); uchar8 __ovld __cnfn bitfield_extract_unsigned(uchar8, uint, uint); ushort8 __ovld __cnfn bitfield_extract_unsigned(ushort8, uint, uint); uint8 __ovld __cnfn bitfield_extract_unsigned(uint8, uint, uint); ulong8 __ovld __cnfn bitfield_extract_unsigned(ulong8, uint, uint); uchar16 __ovld __cnfn bitfield_extract_unsigned(uchar16, uint, uint); ushort16 __ovld __cnfn bitfield_extract_unsigned(ushort16, uint, uint); uint16 __ovld __cnfn bitfield_extract_unsigned(uint16, uint, uint); ulong16 __ovld __cnfn bitfield_extract_unsigned(ulong16, uint, uint); char __ovld __cnfn bit_reverse(char); uchar __ovld __cnfn bit_reverse(uchar); short __ovld __cnfn bit_reverse(short); ushort __ovld __cnfn bit_reverse(ushort); int __ovld __cnfn bit_reverse(int); uint __ovld __cnfn bit_reverse(uint); long __ovld __cnfn bit_reverse(long); ulong __ovld __cnfn bit_reverse(ulong); char2 __ovld __cnfn bit_reverse(char2); uchar2 __ovld __cnfn bit_reverse(uchar2); short2 __ovld __cnfn bit_reverse(short2); ushort2 __ovld __cnfn bit_reverse(ushort2); int2 __ovld __cnfn bit_reverse(int2); uint2 __ovld __cnfn bit_reverse(uint2); long2 __ovld __cnfn bit_reverse(long2); ulong2 __ovld __cnfn bit_reverse(ulong2); char3 __ovld __cnfn bit_reverse(char3); uchar3 __ovld __cnfn bit_reverse(uchar3); short3 __ovld __cnfn bit_reverse(short3); ushort3 __ovld __cnfn bit_reverse(ushort3); int3 __ovld __cnfn bit_reverse(int3); uint3 __ovld __cnfn bit_reverse(uint3); long3 __ovld __cnfn bit_reverse(long3); ulong3 __ovld __cnfn bit_reverse(ulong3); char4 __ovld __cnfn bit_reverse(char4); uchar4 __ovld __cnfn bit_reverse(uchar4); short4 __ovld __cnfn bit_reverse(short4); ushort4 __ovld __cnfn bit_reverse(ushort4); int4 __ovld __cnfn bit_reverse(int4); uint4 __ovld __cnfn bit_reverse(uint4); long4 __ovld __cnfn bit_reverse(long4); ulong4 __ovld __cnfn bit_reverse(ulong4); char8 __ovld __cnfn bit_reverse(char8); uchar8 __ovld __cnfn bit_reverse(uchar8); short8 __ovld __cnfn bit_reverse(short8); ushort8 __ovld __cnfn bit_reverse(ushort8); int8 __ovld __cnfn bit_reverse(int8); uint8 __ovld __cnfn bit_reverse(uint8); long8 __ovld __cnfn bit_reverse(long8); ulong8 __ovld __cnfn bit_reverse(ulong8); char16 __ovld __cnfn bit_reverse(char16); uchar16 __ovld __cnfn bit_reverse(uchar16); short16 __ovld __cnfn bit_reverse(short16); ushort16 __ovld __cnfn bit_reverse(ushort16); int16 __ovld __cnfn bit_reverse(int16); uint16 __ovld __cnfn bit_reverse(uint16); long16 __ovld __cnfn bit_reverse(long16); ulong16 __ovld __cnfn bit_reverse(ulong16); #endif // cl_khr_extended_bit_ops #if defined(__opencl_c_integer_dot_product_input_4x8bit) uint __ovld __cnfn dot(uchar4, uchar4); int __ovld __cnfn dot(char4, char4); int __ovld __cnfn dot(uchar4, char4); int __ovld __cnfn dot(char4, uchar4); uint __ovld __cnfn dot_acc_sat(uchar4, uchar4, uint); int __ovld __cnfn dot_acc_sat(char4, char4, int); int __ovld __cnfn dot_acc_sat(uchar4, char4, int); int __ovld __cnfn dot_acc_sat(char4, uchar4, int); #endif // __opencl_c_integer_dot_product_input_4x8bit #if defined(__opencl_c_integer_dot_product_input_4x8bit_packed) uint __ovld __cnfn dot_4x8packed_uu_uint(uint, uint); int __ovld __cnfn dot_4x8packed_ss_int(uint, uint); int __ovld __cnfn dot_4x8packed_us_int(uint, uint); int __ovld __cnfn dot_4x8packed_su_int(uint, uint); uint __ovld __cnfn dot_acc_sat_4x8packed_uu_uint(uint, uint, uint); int __ovld __cnfn dot_acc_sat_4x8packed_ss_int(uint, uint, int); int __ovld __cnfn dot_acc_sat_4x8packed_us_int(uint, uint, int); int __ovld __cnfn dot_acc_sat_4x8packed_su_int(uint, uint, int); #endif // __opencl_c_integer_dot_product_input_4x8bit_packed #if defined(cl_khr_subgroup_rotate) char __ovld __conv sub_group_rotate(char, int); uchar __ovld __conv sub_group_rotate(uchar, int); short __ovld __conv sub_group_rotate(short, int); ushort __ovld __conv sub_group_rotate(ushort, int); int __ovld __conv sub_group_rotate(int, int); uint __ovld __conv sub_group_rotate(uint, int); long __ovld __conv sub_group_rotate(long, int); ulong __ovld __conv sub_group_rotate(ulong, int); float __ovld __conv sub_group_rotate(float, int); #if defined(cl_khr_fp64) double __ovld __conv sub_group_rotate(double, int); #endif // cl_khr_fp64 #if defined(cl_khr_fp16) half __ovld __conv sub_group_rotate(half, int); #endif // cl_khr_fp16 char __ovld __conv sub_group_clustered_rotate(char, int, uint); uchar __ovld __conv sub_group_clustered_rotate(uchar, int, uint); short __ovld __conv sub_group_clustered_rotate(short, int, uint); ushort __ovld __conv sub_group_clustered_rotate(ushort, int, uint); int __ovld __conv sub_group_clustered_rotate(int, int, uint); uint __ovld __conv sub_group_clustered_rotate(uint, int, uint); long __ovld __conv sub_group_clustered_rotate(long, int, uint); ulong __ovld __conv sub_group_clustered_rotate(ulong, int, uint); float __ovld __conv sub_group_clustered_rotate(float, int, uint); #if defined(cl_khr_fp64) double __ovld __conv sub_group_clustered_rotate(double, int, uint); #endif // cl_khr_fp64 #if defined(cl_khr_fp16) half __ovld __conv sub_group_clustered_rotate(half, int, uint); #endif // cl_khr_fp16 #endif // cl_khr_subgroup_rotate #if defined(cl_intel_subgroups) // Intel-Specific Sub Group Functions float __ovld __conv intel_sub_group_shuffle( float , uint ); float2 __ovld __conv intel_sub_group_shuffle( float2, uint ); float3 __ovld __conv intel_sub_group_shuffle( float3, uint ); float4 __ovld __conv intel_sub_group_shuffle( float4, uint ); float8 __ovld __conv intel_sub_group_shuffle( float8, uint ); float16 __ovld __conv intel_sub_group_shuffle( float16, uint ); int __ovld __conv intel_sub_group_shuffle( int , uint ); int2 __ovld __conv intel_sub_group_shuffle( int2, uint ); int3 __ovld __conv intel_sub_group_shuffle( int3, uint ); int4 __ovld __conv intel_sub_group_shuffle( int4, uint ); int8 __ovld __conv intel_sub_group_shuffle( int8, uint ); int16 __ovld __conv intel_sub_group_shuffle( int16, uint ); uint __ovld __conv intel_sub_group_shuffle( uint , uint ); uint2 __ovld __conv intel_sub_group_shuffle( uint2, uint ); uint3 __ovld __conv intel_sub_group_shuffle( uint3, uint ); uint4 __ovld __conv intel_sub_group_shuffle( uint4, uint ); uint8 __ovld __conv intel_sub_group_shuffle( uint8, uint ); uint16 __ovld __conv intel_sub_group_shuffle( uint16, uint ); long __ovld __conv intel_sub_group_shuffle( long, uint ); ulong __ovld __conv intel_sub_group_shuffle( ulong, uint ); float __ovld __conv intel_sub_group_shuffle_down( float cur, float next, uint ); float2 __ovld __conv intel_sub_group_shuffle_down( float2 cur, float2 next, uint ); float3 __ovld __conv intel_sub_group_shuffle_down( float3 cur, float3 next, uint ); float4 __ovld __conv intel_sub_group_shuffle_down( float4 cur, float4 next, uint ); float8 __ovld __conv intel_sub_group_shuffle_down( float8 cur, float8 next, uint ); float16 __ovld __conv intel_sub_group_shuffle_down( float16 cur, float16 next, uint ); int __ovld __conv intel_sub_group_shuffle_down( int cur, int next, uint ); int2 __ovld __conv intel_sub_group_shuffle_down( int2 cur, int2 next, uint ); int3 __ovld __conv intel_sub_group_shuffle_down( int3 cur, int3 next, uint ); int4 __ovld __conv intel_sub_group_shuffle_down( int4 cur, int4 next, uint ); int8 __ovld __conv intel_sub_group_shuffle_down( int8 cur, int8 next, uint ); int16 __ovld __conv intel_sub_group_shuffle_down( int16 cur, int16 next, uint ); uint __ovld __conv intel_sub_group_shuffle_down( uint cur, uint next, uint ); uint2 __ovld __conv intel_sub_group_shuffle_down( uint2 cur, uint2 next, uint ); uint3 __ovld __conv intel_sub_group_shuffle_down( uint3 cur, uint3 next, uint ); uint4 __ovld __conv intel_sub_group_shuffle_down( uint4 cur, uint4 next, uint ); uint8 __ovld __conv intel_sub_group_shuffle_down( uint8 cur, uint8 next, uint ); uint16 __ovld __conv intel_sub_group_shuffle_down( uint16 cur, uint16 next, uint ); long __ovld __conv intel_sub_group_shuffle_down( long prev, long cur, uint ); ulong __ovld __conv intel_sub_group_shuffle_down( ulong prev, ulong cur, uint ); float __ovld __conv intel_sub_group_shuffle_up( float prev, float cur, uint ); float2 __ovld __conv intel_sub_group_shuffle_up( float2 prev, float2 cur, uint ); float3 __ovld __conv intel_sub_group_shuffle_up( float3 prev, float3 cur, uint ); float4 __ovld __conv intel_sub_group_shuffle_up( float4 prev, float4 cur, uint ); float8 __ovld __conv intel_sub_group_shuffle_up( float8 prev, float8 cur, uint ); float16 __ovld __conv intel_sub_group_shuffle_up( float16 prev, float16 cur, uint ); int __ovld __conv intel_sub_group_shuffle_up( int prev, int cur, uint ); int2 __ovld __conv intel_sub_group_shuffle_up( int2 prev, int2 cur, uint ); int3 __ovld __conv intel_sub_group_shuffle_up( int3 prev, int3 cur, uint ); int4 __ovld __conv intel_sub_group_shuffle_up( int4 prev, int4 cur, uint ); int8 __ovld __conv intel_sub_group_shuffle_up( int8 prev, int8 cur, uint ); int16 __ovld __conv intel_sub_group_shuffle_up( int16 prev, int16 cur, uint ); uint __ovld __conv intel_sub_group_shuffle_up( uint prev, uint cur, uint ); uint2 __ovld __conv intel_sub_group_shuffle_up( uint2 prev, uint2 cur, uint ); uint3 __ovld __conv intel_sub_group_shuffle_up( uint3 prev, uint3 cur, uint ); uint4 __ovld __conv intel_sub_group_shuffle_up( uint4 prev, uint4 cur, uint ); uint8 __ovld __conv intel_sub_group_shuffle_up( uint8 prev, uint8 cur, uint ); uint16 __ovld __conv intel_sub_group_shuffle_up( uint16 prev, uint16 cur, uint ); long __ovld __conv intel_sub_group_shuffle_up( long prev, long cur, uint ); ulong __ovld __conv intel_sub_group_shuffle_up( ulong prev, ulong cur, uint ); float __ovld __conv intel_sub_group_shuffle_xor( float , uint ); float2 __ovld __conv intel_sub_group_shuffle_xor( float2, uint ); float3 __ovld __conv intel_sub_group_shuffle_xor( float3, uint ); float4 __ovld __conv intel_sub_group_shuffle_xor( float4, uint ); float8 __ovld __conv intel_sub_group_shuffle_xor( float8, uint ); float16 __ovld __conv intel_sub_group_shuffle_xor( float16, uint ); int __ovld __conv intel_sub_group_shuffle_xor( int , uint ); int2 __ovld __conv intel_sub_group_shuffle_xor( int2, uint ); int3 __ovld __conv intel_sub_group_shuffle_xor( int3, uint ); int4 __ovld __conv intel_sub_group_shuffle_xor( int4, uint ); int8 __ovld __conv intel_sub_group_shuffle_xor( int8, uint ); int16 __ovld __conv intel_sub_group_shuffle_xor( int16, uint ); uint __ovld __conv intel_sub_group_shuffle_xor( uint , uint ); uint2 __ovld __conv intel_sub_group_shuffle_xor( uint2, uint ); uint3 __ovld __conv intel_sub_group_shuffle_xor( uint3, uint ); uint4 __ovld __conv intel_sub_group_shuffle_xor( uint4, uint ); uint8 __ovld __conv intel_sub_group_shuffle_xor( uint8, uint ); uint16 __ovld __conv intel_sub_group_shuffle_xor( uint16, uint ); long __ovld __conv intel_sub_group_shuffle_xor( long, uint ); ulong __ovld __conv intel_sub_group_shuffle_xor( ulong, uint ); #if defined(__opencl_c_images) uint __ovld __conv intel_sub_group_block_read(read_only image2d_t, int2); uint2 __ovld __conv intel_sub_group_block_read2(read_only image2d_t, int2); uint4 __ovld __conv intel_sub_group_block_read4(read_only image2d_t, int2); uint8 __ovld __conv intel_sub_group_block_read8(read_only image2d_t, int2); #endif #if defined(__opencl_c_read_write_images) uint __ovld __conv intel_sub_group_block_read(read_write image2d_t, int2); uint2 __ovld __conv intel_sub_group_block_read2(read_write image2d_t, int2); uint4 __ovld __conv intel_sub_group_block_read4(read_write image2d_t, int2); uint8 __ovld __conv intel_sub_group_block_read8(read_write image2d_t, int2); #endif // defined(__opencl_c_read_write_images) uint __ovld __conv intel_sub_group_block_read( const __global uint* p ); uint2 __ovld __conv intel_sub_group_block_read2( const __global uint* p ); uint4 __ovld __conv intel_sub_group_block_read4( const __global uint* p ); uint8 __ovld __conv intel_sub_group_block_read8( const __global uint* p ); #if defined(__opencl_c_images) void __ovld __conv intel_sub_group_block_write(write_only image2d_t, int2, uint); void __ovld __conv intel_sub_group_block_write2(write_only image2d_t, int2, uint2); void __ovld __conv intel_sub_group_block_write4(write_only image2d_t, int2, uint4); void __ovld __conv intel_sub_group_block_write8(write_only image2d_t, int2, uint8); #endif // defined(__opencl_c_images) #if defined(__opencl_c_read_write_images) void __ovld __conv intel_sub_group_block_write(read_write image2d_t, int2, uint); void __ovld __conv intel_sub_group_block_write2(read_write image2d_t, int2, uint2); void __ovld __conv intel_sub_group_block_write4(read_write image2d_t, int2, uint4); void __ovld __conv intel_sub_group_block_write8(read_write image2d_t, int2, uint8); #endif // defined(__opencl_c_read_write_images) void __ovld __conv intel_sub_group_block_write( __global uint* p, uint data ); void __ovld __conv intel_sub_group_block_write2( __global uint* p, uint2 data ); void __ovld __conv intel_sub_group_block_write4( __global uint* p, uint4 data ); void __ovld __conv intel_sub_group_block_write8( __global uint* p, uint8 data ); #ifdef cl_khr_fp16 half __ovld __conv intel_sub_group_shuffle( half, uint ); half __ovld __conv intel_sub_group_shuffle_down( half prev, half cur, uint ); half __ovld __conv intel_sub_group_shuffle_up( half prev, half cur, uint ); half __ovld __conv intel_sub_group_shuffle_xor( half, uint ); #endif #if defined(cl_khr_fp64) double __ovld __conv intel_sub_group_shuffle( double, uint ); double __ovld __conv intel_sub_group_shuffle_down( double prev, double cur, uint ); double __ovld __conv intel_sub_group_shuffle_up( double prev, double cur, uint ); double __ovld __conv intel_sub_group_shuffle_xor( double, uint ); #endif #endif //cl_intel_subgroups #if defined(cl_intel_subgroups_short) short __ovld __conv intel_sub_group_broadcast( short , uint sub_group_local_id ); short2 __ovld __conv intel_sub_group_broadcast( short2, uint sub_group_local_id ); short3 __ovld __conv intel_sub_group_broadcast( short3, uint sub_group_local_id ); short4 __ovld __conv intel_sub_group_broadcast( short4, uint sub_group_local_id ); short8 __ovld __conv intel_sub_group_broadcast( short8, uint sub_group_local_id ); ushort __ovld __conv intel_sub_group_broadcast( ushort , uint sub_group_local_id ); ushort2 __ovld __conv intel_sub_group_broadcast( ushort2, uint sub_group_local_id ); ushort3 __ovld __conv intel_sub_group_broadcast( ushort3, uint sub_group_local_id ); ushort4 __ovld __conv intel_sub_group_broadcast( ushort4, uint sub_group_local_id ); ushort8 __ovld __conv intel_sub_group_broadcast( ushort8, uint sub_group_local_id ); short __ovld __conv intel_sub_group_shuffle( short , uint ); short2 __ovld __conv intel_sub_group_shuffle( short2 , uint ); short3 __ovld __conv intel_sub_group_shuffle( short3 , uint ); short4 __ovld __conv intel_sub_group_shuffle( short4 , uint ); short8 __ovld __conv intel_sub_group_shuffle( short8 , uint ); short16 __ovld __conv intel_sub_group_shuffle( short16, uint); ushort __ovld __conv intel_sub_group_shuffle( ushort , uint ); ushort2 __ovld __conv intel_sub_group_shuffle( ushort2 , uint ); ushort3 __ovld __conv intel_sub_group_shuffle( ushort3 , uint ); ushort4 __ovld __conv intel_sub_group_shuffle( ushort4 , uint ); ushort8 __ovld __conv intel_sub_group_shuffle( ushort8 , uint ); ushort16 __ovld __conv intel_sub_group_shuffle( ushort16, uint ); short __ovld __conv intel_sub_group_shuffle_down( short cur, short next, uint ); short2 __ovld __conv intel_sub_group_shuffle_down( short2 cur, short2 next, uint ); short3 __ovld __conv intel_sub_group_shuffle_down( short3 cur, short3 next, uint ); short4 __ovld __conv intel_sub_group_shuffle_down( short4 cur, short4 next, uint ); short8 __ovld __conv intel_sub_group_shuffle_down( short8 cur, short8 next, uint ); short16 __ovld __conv intel_sub_group_shuffle_down( short16 cur, short16 next, uint ); ushort __ovld __conv intel_sub_group_shuffle_down( ushort cur, ushort next, uint ); ushort2 __ovld __conv intel_sub_group_shuffle_down( ushort2 cur, ushort2 next, uint ); ushort3 __ovld __conv intel_sub_group_shuffle_down( ushort3 cur, ushort3 next, uint ); ushort4 __ovld __conv intel_sub_group_shuffle_down( ushort4 cur, ushort4 next, uint ); ushort8 __ovld __conv intel_sub_group_shuffle_down( ushort8 cur, ushort8 next, uint ); ushort16 __ovld __conv intel_sub_group_shuffle_down( ushort16 cur, ushort16 next, uint ); short __ovld __conv intel_sub_group_shuffle_up( short cur, short next, uint ); short2 __ovld __conv intel_sub_group_shuffle_up( short2 cur, short2 next, uint ); short3 __ovld __conv intel_sub_group_shuffle_up( short3 cur, short3 next, uint ); short4 __ovld __conv intel_sub_group_shuffle_up( short4 cur, short4 next, uint ); short8 __ovld __conv intel_sub_group_shuffle_up( short8 cur, short8 next, uint ); short16 __ovld __conv intel_sub_group_shuffle_up( short16 cur, short16 next, uint ); ushort __ovld __conv intel_sub_group_shuffle_up( ushort cur, ushort next, uint ); ushort2 __ovld __conv intel_sub_group_shuffle_up( ushort2 cur, ushort2 next, uint ); ushort3 __ovld __conv intel_sub_group_shuffle_up( ushort3 cur, ushort3 next, uint ); ushort4 __ovld __conv intel_sub_group_shuffle_up( ushort4 cur, ushort4 next, uint ); ushort8 __ovld __conv intel_sub_group_shuffle_up( ushort8 cur, ushort8 next, uint ); ushort16 __ovld __conv intel_sub_group_shuffle_up( ushort16 cur, ushort16 next, uint ); short __ovld __conv intel_sub_group_shuffle_xor( short , uint ); short2 __ovld __conv intel_sub_group_shuffle_xor( short2 , uint ); short3 __ovld __conv intel_sub_group_shuffle_xor( short3 , uint ); short4 __ovld __conv intel_sub_group_shuffle_xor( short4 , uint ); short8 __ovld __conv intel_sub_group_shuffle_xor( short8 , uint ); short16 __ovld __conv intel_sub_group_shuffle_xor( short16, uint ); ushort __ovld __conv intel_sub_group_shuffle_xor( ushort , uint ); ushort2 __ovld __conv intel_sub_group_shuffle_xor( ushort2 , uint ); ushort3 __ovld __conv intel_sub_group_shuffle_xor( ushort3 , uint ); ushort4 __ovld __conv intel_sub_group_shuffle_xor( ushort4 , uint ); ushort8 __ovld __conv intel_sub_group_shuffle_xor( ushort8 , uint ); ushort16 __ovld __conv intel_sub_group_shuffle_xor( ushort16, uint ); short __ovld __conv intel_sub_group_reduce_add( short x ); ushort __ovld __conv intel_sub_group_reduce_add( ushort x ); short __ovld __conv intel_sub_group_reduce_min( short x ); ushort __ovld __conv intel_sub_group_reduce_min( ushort x ); short __ovld __conv intel_sub_group_reduce_max( short x ); ushort __ovld __conv intel_sub_group_reduce_max( ushort x ); short __ovld __conv intel_sub_group_scan_exclusive_add( short x ); ushort __ovld __conv intel_sub_group_scan_exclusive_add( ushort x ); short __ovld __conv intel_sub_group_scan_exclusive_min( short x ); ushort __ovld __conv intel_sub_group_scan_exclusive_min( ushort x ); short __ovld __conv intel_sub_group_scan_exclusive_max( short x ); ushort __ovld __conv intel_sub_group_scan_exclusive_max( ushort x ); short __ovld __conv intel_sub_group_scan_inclusive_add( short x ); ushort __ovld __conv intel_sub_group_scan_inclusive_add( ushort x ); short __ovld __conv intel_sub_group_scan_inclusive_min( short x ); ushort __ovld __conv intel_sub_group_scan_inclusive_min( ushort x ); short __ovld __conv intel_sub_group_scan_inclusive_max( short x ); ushort __ovld __conv intel_sub_group_scan_inclusive_max( ushort x ); #if defined(__opencl_c_images) uint __ovld __conv intel_sub_group_block_read_ui(read_only image2d_t, int2); uint2 __ovld __conv intel_sub_group_block_read_ui2(read_only image2d_t, int2); uint4 __ovld __conv intel_sub_group_block_read_ui4(read_only image2d_t, int2); uint8 __ovld __conv intel_sub_group_block_read_ui8(read_only image2d_t, int2); #endif // defined(__opencl_c_images) #if defined(__opencl_c_read_write_images) uint __ovld __conv intel_sub_group_block_read_ui(read_write image2d_t, int2); uint2 __ovld __conv intel_sub_group_block_read_ui2(read_write image2d_t, int2); uint4 __ovld __conv intel_sub_group_block_read_ui4(read_write image2d_t, int2); uint8 __ovld __conv intel_sub_group_block_read_ui8(read_write image2d_t, int2); #endif // defined(__opencl_c_read_write_images) uint __ovld __conv intel_sub_group_block_read_ui( const __global uint* p ); uint2 __ovld __conv intel_sub_group_block_read_ui2( const __global uint* p ); uint4 __ovld __conv intel_sub_group_block_read_ui4( const __global uint* p ); uint8 __ovld __conv intel_sub_group_block_read_ui8( const __global uint* p ); #if defined(__opencl_c_images) void __ovld __conv intel_sub_group_block_write_ui(read_only image2d_t, int2, uint); void __ovld __conv intel_sub_group_block_write_ui2(read_only image2d_t, int2, uint2); void __ovld __conv intel_sub_group_block_write_ui4(read_only image2d_t, int2, uint4); void __ovld __conv intel_sub_group_block_write_ui8(read_only image2d_t, int2, uint8); #endif //defined(__opencl_c_images) #if defined(__opencl_c_read_write_images) void __ovld __conv intel_sub_group_block_write_ui(read_write image2d_t, int2, uint); void __ovld __conv intel_sub_group_block_write_ui2(read_write image2d_t, int2, uint2); void __ovld __conv intel_sub_group_block_write_ui4(read_write image2d_t, int2, uint4); void __ovld __conv intel_sub_group_block_write_ui8(read_write image2d_t, int2, uint8); #endif // defined(__opencl_c_read_write_images) void __ovld __conv intel_sub_group_block_write_ui( __global uint* p, uint data ); void __ovld __conv intel_sub_group_block_write_ui2( __global uint* p, uint2 data ); void __ovld __conv intel_sub_group_block_write_ui4( __global uint* p, uint4 data ); void __ovld __conv intel_sub_group_block_write_ui8( __global uint* p, uint8 data ); #if defined(__opencl_c_images) ushort __ovld __conv intel_sub_group_block_read_us(read_only image2d_t, int2); ushort2 __ovld __conv intel_sub_group_block_read_us2(read_only image2d_t, int2); ushort4 __ovld __conv intel_sub_group_block_read_us4(read_only image2d_t, int2); ushort8 __ovld __conv intel_sub_group_block_read_us8(read_only image2d_t, int2); #endif // defined(__opencl_c_images) #if defined(__opencl_c_read_write_images) ushort __ovld __conv intel_sub_group_block_read_us(read_write image2d_t, int2); ushort2 __ovld __conv intel_sub_group_block_read_us2(read_write image2d_t, int2); ushort4 __ovld __conv intel_sub_group_block_read_us4(read_write image2d_t, int2); ushort8 __ovld __conv intel_sub_group_block_read_us8(read_write image2d_t, int2); #endif // defined(__opencl_c_read_write_images) ushort __ovld __conv intel_sub_group_block_read_us( const __global ushort* p ); ushort2 __ovld __conv intel_sub_group_block_read_us2( const __global ushort* p ); ushort4 __ovld __conv intel_sub_group_block_read_us4( const __global ushort* p ); ushort8 __ovld __conv intel_sub_group_block_read_us8( const __global ushort* p ); #if defined(__opencl_c_images) void __ovld __conv intel_sub_group_block_write_us(write_only image2d_t, int2, ushort); void __ovld __conv intel_sub_group_block_write_us2(write_only image2d_t, int2, ushort2); void __ovld __conv intel_sub_group_block_write_us4(write_only image2d_t, int2, ushort4); void __ovld __conv intel_sub_group_block_write_us8(write_only image2d_t, int2, ushort8); #endif // defined(__opencl_c_images) #if defined(__opencl_c_read_write_images) void __ovld __conv intel_sub_group_block_write_us(read_write image2d_t, int2, ushort); void __ovld __conv intel_sub_group_block_write_us2(read_write image2d_t, int2, ushort2); void __ovld __conv intel_sub_group_block_write_us4(read_write image2d_t, int2, ushort4); void __ovld __conv intel_sub_group_block_write_us8(read_write image2d_t, int2, ushort8); #endif // defined(__opencl_c_read_write_images) void __ovld __conv intel_sub_group_block_write_us( __global ushort* p, ushort data ); void __ovld __conv intel_sub_group_block_write_us2( __global ushort* p, ushort2 data ); void __ovld __conv intel_sub_group_block_write_us4( __global ushort* p, ushort4 data ); void __ovld __conv intel_sub_group_block_write_us8( __global ushort* p, ushort8 data ); #endif // cl_intel_subgroups_short #ifdef cl_intel_device_side_avc_motion_estimation #pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : begin // MCE built-in functions uchar __ovld intel_sub_group_avc_mce_get_default_inter_base_multi_reference_penalty( uchar slice_type, uchar qp); ulong __ovld intel_sub_group_avc_mce_get_default_inter_shape_penalty( uchar slice_type, uchar qp); uchar __ovld intel_sub_group_avc_mce_get_default_inter_direction_penalty( uchar slice_type, uchar qp); uint __ovld intel_sub_group_avc_mce_get_default_intra_luma_shape_penalty( uchar slice_type, uchar qp); uint2 __ovld intel_sub_group_avc_mce_get_default_inter_motion_vector_cost_table( uchar slice_type, uchar qp); uchar __ovld intel_sub_group_avc_mce_get_default_intra_luma_mode_penalty( uchar slice_type, uchar qp); uint2 __ovld intel_sub_group_avc_mce_get_default_high_penalty_cost_table(); uint2 __ovld intel_sub_group_avc_mce_get_default_medium_penalty_cost_table(); uint2 __ovld intel_sub_group_avc_mce_get_default_low_penalty_cost_table(); uint __ovld intel_sub_group_avc_mce_get_default_non_dc_luma_intra_penalty(); uchar __ovld intel_sub_group_avc_mce_get_default_intra_chroma_mode_base_penalty(); intel_sub_group_avc_mce_payload_t __ovld intel_sub_group_avc_mce_set_inter_base_multi_reference_penalty( uchar reference_base_penalty, intel_sub_group_avc_mce_payload_t payload); intel_sub_group_avc_mce_payload_t __ovld intel_sub_group_avc_mce_set_inter_shape_penalty( ulong packed_shape_penalty, intel_sub_group_avc_mce_payload_t payload); intel_sub_group_avc_mce_payload_t __ovld intel_sub_group_avc_mce_set_inter_direction_penalty( uchar direction_cost, intel_sub_group_avc_mce_payload_t payload); intel_sub_group_avc_mce_payload_t __ovld intel_sub_group_avc_mce_set_motion_vector_cost_function( ulong packed_cost_center_delta, uint2 packed_cost_table, uchar cost_precision, intel_sub_group_avc_mce_payload_t payload); intel_sub_group_avc_mce_payload_t __ovld intel_sub_group_avc_mce_set_ac_only_haar( intel_sub_group_avc_mce_payload_t payload); intel_sub_group_avc_mce_payload_t __ovld intel_sub_group_avc_mce_set_source_interlaced_field_polarity( uchar src_field_polarity, intel_sub_group_avc_mce_payload_t payload); intel_sub_group_avc_mce_payload_t __ovld intel_sub_group_avc_mce_set_single_reference_interlaced_field_polarity( uchar ref_field_polarity, intel_sub_group_avc_mce_payload_t payload); intel_sub_group_avc_mce_payload_t __ovld intel_sub_group_avc_mce_set_dual_reference_interlaced_field_polarities( uchar fwd_ref_field_polarity, uchar bwd_ref_field_polarity, intel_sub_group_avc_mce_payload_t payload); ulong __ovld intel_sub_group_avc_mce_get_motion_vectors( intel_sub_group_avc_mce_result_t result); ushort __ovld intel_sub_group_avc_mce_get_inter_distortions( intel_sub_group_avc_mce_result_t result); ushort __ovld intel_sub_group_avc_mce_get_best_inter_distortion( intel_sub_group_avc_mce_result_t result); uchar __ovld intel_sub_group_avc_mce_get_inter_major_shape( intel_sub_group_avc_mce_result_t result); uchar __ovld intel_sub_group_avc_mce_get_inter_minor_shapes( intel_sub_group_avc_mce_result_t result); uchar __ovld intel_sub_group_avc_mce_get_inter_directions( intel_sub_group_avc_mce_result_t result); uchar __ovld intel_sub_group_avc_mce_get_inter_motion_vector_count( intel_sub_group_avc_mce_result_t result); uint __ovld intel_sub_group_avc_mce_get_inter_reference_ids( intel_sub_group_avc_mce_result_t result); uchar __ovld intel_sub_group_avc_mce_get_inter_reference_interlaced_field_polarities( uint packed_reference_ids, uint packed_reference_parameter_field_polarities, intel_sub_group_avc_mce_result_t result); // IME built-in functions intel_sub_group_avc_ime_payload_t __ovld intel_sub_group_avc_ime_initialize( ushort2 src_coord, uchar partition_mask, uchar sad_adjustment); intel_sub_group_avc_ime_payload_t __ovld intel_sub_group_avc_ime_set_single_reference( short2 ref_offset, uchar search_window_config, intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ime_payload_t __ovld intel_sub_group_avc_ime_set_dual_reference( short2 fwd_ref_offset, short2 bwd_ref_offset, uchar search_window_config, intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ime_payload_t __ovld intel_sub_group_avc_ime_set_max_motion_vector_count( uchar max_motion_vector_count, intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ime_payload_t __ovld intel_sub_group_avc_ime_set_unidirectional_mix_disable( intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ime_payload_t __ovld intel_sub_group_avc_ime_set_early_search_termination_threshold( uchar threshold, intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ime_payload_t __ovld intel_sub_group_avc_ime_set_weighted_sad( uint packed_sad_weights, intel_sub_group_avc_ime_payload_t payload); __attribute__((deprecated("If you use the latest Intel driver, please use " "intel_sub_group_avc_ime_ref_window_size instead", "intel_sub_group_avc_ime_ref_window_size"))) ushort2 __ovld intel_sub_group_ime_ref_window_size(uchar search_window_config, char dual_ref); ushort2 __ovld intel_sub_group_avc_ime_ref_window_size( uchar search_window_config, char dual_ref); short2 __ovld intel_sub_group_avc_ime_adjust_ref_offset( short2 ref_offset, ushort2 src_coord, ushort2 ref_window_size, ushort2 image_size); #if defined(__opencl_c_images) intel_sub_group_avc_ime_result_t __ovld intel_sub_group_avc_ime_evaluate_with_single_reference( read_only image2d_t src_image, read_only image2d_t ref_image, sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ime_result_t __ovld intel_sub_group_avc_ime_evaluate_with_dual_reference( read_only image2d_t src_image, read_only image2d_t fwd_ref_image, read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ime_result_single_reference_streamout_t __ovld intel_sub_group_avc_ime_evaluate_with_single_reference_streamout( read_only image2d_t src_image, read_only image2d_t ref_image, sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ime_result_dual_reference_streamout_t __ovld intel_sub_group_avc_ime_evaluate_with_dual_reference_streamout( read_only image2d_t src_image, read_only image2d_t fwd_ref_image, read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ime_result_t __ovld intel_sub_group_avc_ime_evaluate_with_single_reference_streamin( read_only image2d_t src_image, read_only image2d_t ref_image, sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload, intel_sub_group_avc_ime_single_reference_streamin_t streamin_components); intel_sub_group_avc_ime_result_t __ovld intel_sub_group_avc_ime_evaluate_with_dual_reference_streamin( read_only image2d_t src_image, read_only image2d_t fwd_ref_image, read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload, intel_sub_group_avc_ime_dual_reference_streamin_t streamin_components); intel_sub_group_avc_ime_result_single_reference_streamout_t __ovld intel_sub_group_avc_ime_evaluate_with_single_reference_streaminout( read_only image2d_t src_image, read_only image2d_t ref_image, sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload, intel_sub_group_avc_ime_single_reference_streamin_t streamin_components); intel_sub_group_avc_ime_result_dual_reference_streamout_t __ovld intel_sub_group_avc_ime_evaluate_with_dual_reference_streaminout( read_only image2d_t src_image, read_only image2d_t fwd_ref_image, read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload, intel_sub_group_avc_ime_dual_reference_streamin_t streamin_components); #endif intel_sub_group_avc_ime_single_reference_streamin_t __ovld intel_sub_group_avc_ime_get_single_reference_streamin( intel_sub_group_avc_ime_result_single_reference_streamout_t result); intel_sub_group_avc_ime_dual_reference_streamin_t __ovld intel_sub_group_avc_ime_get_dual_reference_streamin( intel_sub_group_avc_ime_result_dual_reference_streamout_t result); intel_sub_group_avc_ime_result_t __ovld intel_sub_group_avc_ime_strip_single_reference_streamout( intel_sub_group_avc_ime_result_single_reference_streamout_t result); intel_sub_group_avc_ime_result_t __ovld intel_sub_group_avc_ime_strip_dual_reference_streamout( intel_sub_group_avc_ime_result_dual_reference_streamout_t result); uint __ovld intel_sub_group_avc_ime_get_streamout_major_shape_motion_vectors( intel_sub_group_avc_ime_result_single_reference_streamout_t result, uchar major_shape); ushort __ovld intel_sub_group_avc_ime_get_streamout_major_shape_distortions( intel_sub_group_avc_ime_result_single_reference_streamout_t result, uchar major_shape); uchar __ovld intel_sub_group_avc_ime_get_streamout_major_shape_reference_ids( intel_sub_group_avc_ime_result_single_reference_streamout_t result, uchar major_shape); uint __ovld intel_sub_group_avc_ime_get_streamout_major_shape_motion_vectors( intel_sub_group_avc_ime_result_dual_reference_streamout_t result, uchar major_shape, uchar direction); ushort __ovld intel_sub_group_avc_ime_get_streamout_major_shape_distortions( intel_sub_group_avc_ime_result_dual_reference_streamout_t result, uchar major_shape, uchar direction); uchar __ovld intel_sub_group_avc_ime_get_streamout_major_shape_reference_ids( intel_sub_group_avc_ime_result_dual_reference_streamout_t result, uchar major_shape, uchar direction); uchar __ovld intel_sub_group_avc_ime_get_border_reached( uchar image_select, intel_sub_group_avc_ime_result_t result); uchar __ovld intel_sub_group_avc_ime_get_truncated_search_indication( intel_sub_group_avc_ime_result_t result); uchar __ovld intel_sub_group_avc_ime_get_unidirectional_early_search_termination( intel_sub_group_avc_ime_result_t result); uint __ovld intel_sub_group_avc_ime_get_weighting_pattern_minimum_motion_vector( intel_sub_group_avc_ime_result_t result); ushort __ovld intel_sub_group_avc_ime_get_weighting_pattern_minimum_distortion( intel_sub_group_avc_ime_result_t result); // REF built-in functions intel_sub_group_avc_ref_payload_t __ovld intel_sub_group_avc_fme_initialize( ushort2 src_coord, ulong motion_vectors, uchar major_shapes, uchar minor_shapes, uchar directions, uchar pixel_resolution, uchar sad_adjustment); intel_sub_group_avc_ref_payload_t __ovld intel_sub_group_avc_bme_initialize( ushort2 src_coord, ulong motion_vectors, uchar major_shapes, uchar minor_shapes, uchar directions, uchar pixel_resolution, uchar bidirectional_weight, uchar sad_adjustment); intel_sub_group_avc_ref_payload_t __ovld intel_sub_group_avc_ref_set_bidirectional_mix_disable( intel_sub_group_avc_ref_payload_t payload); intel_sub_group_avc_ref_payload_t __ovld intel_sub_group_avc_ref_set_bilinear_filter_enable( intel_sub_group_avc_ref_payload_t payload); #if defined(__opencl_c_images) intel_sub_group_avc_ref_result_t __ovld intel_sub_group_avc_ref_evaluate_with_single_reference( read_only image2d_t src_image, read_only image2d_t ref_image, sampler_t vme_media_sampler, intel_sub_group_avc_ref_payload_t payload); intel_sub_group_avc_ref_result_t __ovld intel_sub_group_avc_ref_evaluate_with_dual_reference( read_only image2d_t src_image, read_only image2d_t fwd_ref_image, read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler, intel_sub_group_avc_ref_payload_t payload); intel_sub_group_avc_ref_result_t __ovld intel_sub_group_avc_ref_evaluate_with_multi_reference( read_only image2d_t src_image, uint packed_reference_ids, sampler_t vme_media_sampler, intel_sub_group_avc_ref_payload_t payload); intel_sub_group_avc_ref_result_t __ovld intel_sub_group_avc_ref_evaluate_with_multi_reference( read_only image2d_t src_image, uint packed_reference_ids, uchar packed_reference_field_polarities, sampler_t vme_media_sampler, intel_sub_group_avc_ref_payload_t payload); #endif //defined(__opencl_c_images) // SIC built-in functions intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_initialize( ushort2 src_coord); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_configure_skc( uint skip_block_partition_type, uint skip_motion_vector_mask, ulong motion_vectors, uchar bidirectional_weight, uchar skip_sad_adjustment, intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_configure_ipe( uchar luma_intra_partition_mask, uchar intra_neighbour_availability, uchar left_edge_luma_pixels, uchar upper_left_corner_luma_pixel, uchar upper_edge_luma_pixels, uchar upper_right_edge_luma_pixels, uchar intra_sad_adjustment, intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_configure_ipe( uchar luma_intra_partition_mask, uchar intra_neighbour_availability, uchar left_edge_luma_pixels, uchar upper_left_corner_luma_pixel, uchar upper_edge_luma_pixels, uchar upper_right_edge_luma_pixels, ushort left_edge_chroma_pixels, ushort upper_left_corner_chroma_pixel, ushort upper_edge_chroma_pixels, uchar intra_sad_adjustment, intel_sub_group_avc_sic_payload_t payload); uint __ovld intel_sub_group_avc_sic_get_motion_vector_mask( uint skip_block_partition_type, uchar direction); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_set_intra_luma_shape_penalty( uint packed_shape_cost, intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_set_intra_luma_mode_cost_function( uchar luma_mode_penalty, uint luma_packed_neighbor_modes, uint luma_packed_non_dc_penalty, intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_set_intra_chroma_mode_cost_function( uchar chroma_mode_penalty, intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_set_skc_bilinear_filter_enable( intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_set_skc_forward_transform_enable( ulong packed_sad_coefficients, intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_set_block_based_raw_skip_sad( uchar block_based_skip_type, intel_sub_group_avc_sic_payload_t payload); #if defined(__opencl_c_images) intel_sub_group_avc_sic_result_t __ovld intel_sub_group_avc_sic_evaluate_ipe( read_only image2d_t src_image, sampler_t vme_media_sampler, intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_sic_result_t __ovld intel_sub_group_avc_sic_evaluate_with_single_reference( read_only image2d_t src_image, read_only image2d_t ref_image, sampler_t vme_media_sampler, intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_sic_result_t __ovld intel_sub_group_avc_sic_evaluate_with_dual_reference( read_only image2d_t src_image, read_only image2d_t fwd_ref_image, read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler, intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_sic_result_t __ovld intel_sub_group_avc_sic_evaluate_with_multi_reference( read_only image2d_t src_image, uint packed_reference_ids, sampler_t vme_media_sampler, intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_sic_result_t __ovld intel_sub_group_avc_sic_evaluate_with_multi_reference( read_only image2d_t src_image, uint packed_reference_ids, uchar packed_reference_field_polarities, sampler_t vme_media_sampler, intel_sub_group_avc_sic_payload_t payload); #endif //defined(__opencl_c_images) uchar __ovld intel_sub_group_avc_sic_get_ipe_luma_shape( intel_sub_group_avc_sic_result_t result); ushort __ovld intel_sub_group_avc_sic_get_best_ipe_luma_distortion( intel_sub_group_avc_sic_result_t result); ushort __ovld intel_sub_group_avc_sic_get_best_ipe_chroma_distortion( intel_sub_group_avc_sic_result_t result); ulong __ovld intel_sub_group_avc_sic_get_packed_ipe_luma_modes( intel_sub_group_avc_sic_result_t result); uchar __ovld intel_sub_group_avc_sic_get_ipe_chroma_mode( intel_sub_group_avc_sic_result_t result); uint __ovld intel_sub_group_avc_sic_get_packed_skc_luma_count_threshold( intel_sub_group_avc_sic_result_t result); ulong __ovld intel_sub_group_avc_sic_get_packed_skc_luma_sum_threshold( intel_sub_group_avc_sic_result_t result); ushort __ovld intel_sub_group_avc_sic_get_inter_raw_sads( intel_sub_group_avc_sic_result_t result); // Wrappers intel_sub_group_avc_ime_payload_t __ovld intel_sub_group_avc_ime_set_inter_base_multi_reference_penalty( uchar reference_base_penalty, intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ref_payload_t __ovld intel_sub_group_avc_ref_set_inter_base_multi_reference_penalty( uchar reference_base_penalty, intel_sub_group_avc_ref_payload_t payload); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_set_inter_base_multi_reference_penalty( uchar reference_base_penalty, intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_ime_payload_t __ovld intel_sub_group_avc_ime_set_inter_shape_penalty( ulong packed_shape_cost, intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ref_payload_t __ovld intel_sub_group_avc_ref_set_inter_shape_penalty( ulong packed_shape_cost, intel_sub_group_avc_ref_payload_t payload); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_set_inter_shape_penalty( ulong packed_shape_cost, intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_ime_payload_t __ovld intel_sub_group_avc_ime_set_inter_direction_penalty( uchar direction_cost, intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ref_payload_t __ovld intel_sub_group_avc_ref_set_inter_direction_penalty( uchar direction_cost, intel_sub_group_avc_ref_payload_t payload); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_set_inter_direction_penalty( uchar direction_cost, intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_ime_payload_t __ovld intel_sub_group_avc_ime_set_motion_vector_cost_function( ulong packed_cost_center_delta, uint2 packed_cost_table, uchar cost_precision, intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ref_payload_t __ovld intel_sub_group_avc_ref_set_motion_vector_cost_function( ulong packed_cost_center_delta, uint2 packed_cost_table, uchar cost_precision, intel_sub_group_avc_ref_payload_t payload); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_set_motion_vector_cost_function( ulong packed_cost_center_delta, uint2 packed_cost_table, uchar cost_precision, intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_ime_payload_t __ovld intel_sub_group_avc_ime_set_source_interlaced_field_polarity( uchar src_field_polarity, intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ref_payload_t __ovld intel_sub_group_avc_ref_set_source_interlaced_field_polarity( uchar src_field_polarity, intel_sub_group_avc_ref_payload_t payload); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_set_source_interlaced_field_polarity( uchar src_field_polarity, intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_ime_payload_t __ovld intel_sub_group_avc_ime_set_single_reference_interlaced_field_polarity( uchar ref_field_polarity, intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ref_payload_t __ovld intel_sub_group_avc_ref_set_single_reference_interlaced_field_polarity( uchar ref_field_polarity, intel_sub_group_avc_ref_payload_t payload); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_set_single_reference_interlaced_field_polarity( uchar ref_field_polarity, intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_ime_payload_t __ovld intel_sub_group_avc_ime_set_dual_reference_interlaced_field_polarities( uchar fwd_ref_field_polarity, uchar bwd_ref_field_polarity, intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ref_payload_t __ovld intel_sub_group_avc_ref_set_dual_reference_interlaced_field_polarities( uchar fwd_ref_field_polarity, uchar bwd_ref_field_polarity, intel_sub_group_avc_ref_payload_t payload); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_set_dual_reference_interlaced_field_polarities( uchar fwd_ref_field_polarity, uchar bwd_ref_field_polarity, intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_ime_payload_t __ovld intel_sub_group_avc_ime_set_ac_only_haar( intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ref_payload_t __ovld intel_sub_group_avc_ref_set_ac_only_haar( intel_sub_group_avc_ref_payload_t payload); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_sic_set_ac_only_haar( intel_sub_group_avc_sic_payload_t payload); ulong __ovld intel_sub_group_avc_ime_get_motion_vectors( intel_sub_group_avc_ime_result_t result); ulong __ovld intel_sub_group_avc_ref_get_motion_vectors( intel_sub_group_avc_ref_result_t result); ushort __ovld intel_sub_group_avc_ime_get_inter_distortions( intel_sub_group_avc_ime_result_t result); ushort __ovld intel_sub_group_avc_ref_get_inter_distortions( intel_sub_group_avc_ref_result_t result); ushort __ovld intel_sub_group_avc_sic_get_inter_distortions( intel_sub_group_avc_sic_result_t result); ushort __ovld intel_sub_group_avc_ime_get_best_inter_distortion( intel_sub_group_avc_ime_result_t result); ushort __ovld intel_sub_group_avc_ref_get_best_inter_distortion( intel_sub_group_avc_ref_result_t result); uchar __ovld intel_sub_group_avc_ime_get_inter_major_shape( intel_sub_group_avc_ime_result_t result); uchar __ovld intel_sub_group_avc_ref_get_inter_major_shape( intel_sub_group_avc_ref_result_t result); uchar __ovld intel_sub_group_avc_ime_get_inter_minor_shapes( intel_sub_group_avc_ime_result_t result); uchar __ovld intel_sub_group_avc_ref_get_inter_minor_shapes( intel_sub_group_avc_ref_result_t result); uchar __ovld intel_sub_group_avc_ime_get_inter_directions( intel_sub_group_avc_ime_result_t result); uchar __ovld intel_sub_group_avc_ref_get_inter_directions( intel_sub_group_avc_ref_result_t result); uchar __ovld intel_sub_group_avc_ime_get_inter_motion_vector_count( intel_sub_group_avc_ime_result_t result); uchar __ovld intel_sub_group_avc_ref_get_inter_motion_vector_count( intel_sub_group_avc_ref_result_t result); uint __ovld intel_sub_group_avc_ime_get_inter_reference_ids( intel_sub_group_avc_ime_result_t result); uint __ovld intel_sub_group_avc_ref_get_inter_reference_ids( intel_sub_group_avc_ref_result_t result); uchar __ovld intel_sub_group_avc_ime_get_inter_reference_interlaced_field_polarities( uint packed_reference_ids, uint packed_reference_parameter_field_polarities, intel_sub_group_avc_ime_result_t result); uchar __ovld intel_sub_group_avc_ref_get_inter_reference_interlaced_field_polarities( uint packed_reference_ids, uint packed_reference_parameter_field_polarities, intel_sub_group_avc_ref_result_t result); // Type conversion functions intel_sub_group_avc_mce_payload_t __ovld intel_sub_group_avc_ime_convert_to_mce_payload( intel_sub_group_avc_ime_payload_t payload); intel_sub_group_avc_ime_payload_t __ovld intel_sub_group_avc_mce_convert_to_ime_payload( intel_sub_group_avc_mce_payload_t payload); intel_sub_group_avc_mce_payload_t __ovld intel_sub_group_avc_ref_convert_to_mce_payload( intel_sub_group_avc_ref_payload_t payload); intel_sub_group_avc_ref_payload_t __ovld intel_sub_group_avc_mce_convert_to_ref_payload( intel_sub_group_avc_mce_payload_t payload); intel_sub_group_avc_mce_payload_t __ovld intel_sub_group_avc_sic_convert_to_mce_payload( intel_sub_group_avc_sic_payload_t payload); intel_sub_group_avc_sic_payload_t __ovld intel_sub_group_avc_mce_convert_to_sic_payload( intel_sub_group_avc_mce_payload_t payload); intel_sub_group_avc_mce_result_t __ovld intel_sub_group_avc_ime_convert_to_mce_result( intel_sub_group_avc_ime_result_t result); intel_sub_group_avc_ime_result_t __ovld intel_sub_group_avc_mce_convert_to_ime_result( intel_sub_group_avc_mce_result_t result); intel_sub_group_avc_mce_result_t __ovld intel_sub_group_avc_ref_convert_to_mce_result( intel_sub_group_avc_ref_result_t result); intel_sub_group_avc_ref_result_t __ovld intel_sub_group_avc_mce_convert_to_ref_result( intel_sub_group_avc_mce_result_t result); intel_sub_group_avc_mce_result_t __ovld intel_sub_group_avc_sic_convert_to_mce_result( intel_sub_group_avc_sic_result_t result); intel_sub_group_avc_sic_result_t __ovld intel_sub_group_avc_mce_convert_to_sic_result( intel_sub_group_avc_mce_result_t result); #pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : end #endif // cl_intel_device_side_avc_motion_estimation #ifdef cl_amd_media_ops uint __ovld amd_bitalign(uint, uint, uint); uint2 __ovld amd_bitalign(uint2, uint2, uint2); uint3 __ovld amd_bitalign(uint3, uint3, uint3); uint4 __ovld amd_bitalign(uint4, uint4, uint4); uint8 __ovld amd_bitalign(uint8, uint8, uint8); uint16 __ovld amd_bitalign(uint16, uint16, uint16); uint __ovld amd_bytealign(uint, uint, uint); uint2 __ovld amd_bytealign(uint2, uint2, uint2); uint3 __ovld amd_bytealign(uint3, uint3, uint3); uint4 __ovld amd_bytealign(uint4, uint4, uint4); uint8 __ovld amd_bytealign(uint8, uint8, uint8); uint16 __ovld amd_bytealign(uint16, uint16, uint16); uint __ovld amd_lerp(uint, uint, uint); uint2 __ovld amd_lerp(uint2, uint2, uint2); uint3 __ovld amd_lerp(uint3, uint3, uint3); uint4 __ovld amd_lerp(uint4, uint4, uint4); uint8 __ovld amd_lerp(uint8, uint8, uint8); uint16 __ovld amd_lerp(uint16, uint16, uint16); uint __ovld amd_pack(float4 v); uint __ovld amd_sad4(uint4, uint4, uint); uint __ovld amd_sadhi(uint, uint, uint); uint2 __ovld amd_sadhi(uint2, uint2, uint2); uint3 __ovld amd_sadhi(uint3, uint3, uint3); uint4 __ovld amd_sadhi(uint4, uint4, uint4); uint8 __ovld amd_sadhi(uint8, uint8, uint8); uint16 __ovld amd_sadhi(uint16, uint16, uint16); uint __ovld amd_sad(uint, uint, uint); uint2 __ovld amd_sad(uint2, uint2, uint2); uint3 __ovld amd_sad(uint3, uint3, uint3); uint4 __ovld amd_sad(uint4, uint4, uint4); uint8 __ovld amd_sad(uint8, uint8, uint8); uint16 __ovld amd_sad(uint16, uint16, uint16); float __ovld amd_unpack0(uint); float2 __ovld amd_unpack0(uint2); float3 __ovld amd_unpack0(uint3); float4 __ovld amd_unpack0(uint4); float8 __ovld amd_unpack0(uint8); float16 __ovld amd_unpack0(uint16); float __ovld amd_unpack1(uint); float2 __ovld amd_unpack1(uint2); float3 __ovld amd_unpack1(uint3); float4 __ovld amd_unpack1(uint4); float8 __ovld amd_unpack1(uint8); float16 __ovld amd_unpack1(uint16); float __ovld amd_unpack2(uint); float2 __ovld amd_unpack2(uint2); float3 __ovld amd_unpack2(uint3); float4 __ovld amd_unpack2(uint4); float8 __ovld amd_unpack2(uint8); float16 __ovld amd_unpack2(uint16); float __ovld amd_unpack3(uint); float2 __ovld amd_unpack3(uint2); float3 __ovld amd_unpack3(uint3); float4 __ovld amd_unpack3(uint4); float8 __ovld amd_unpack3(uint8); float16 __ovld amd_unpack3(uint16); #endif // cl_amd_media_ops #ifdef cl_amd_media_ops2 int __ovld amd_bfe(int src0, uint src1, uint src2); int2 __ovld amd_bfe(int2 src0, uint2 src1, uint2 src2); int3 __ovld amd_bfe(int3 src0, uint3 src1, uint3 src2); int4 __ovld amd_bfe(int4 src0, uint4 src1, uint4 src2); int8 __ovld amd_bfe(int8 src0, uint8 src1, uint8 src2); int16 __ovld amd_bfe(int16 src0, uint16 src1, uint16 src2); uint __ovld amd_bfe(uint src0, uint src1, uint src2); uint2 __ovld amd_bfe(uint2 src0, uint2 src1, uint2 src2); uint3 __ovld amd_bfe(uint3 src0, uint3 src1, uint3 src2); uint4 __ovld amd_bfe(uint4 src0, uint4 src1, uint4 src2); uint8 __ovld amd_bfe(uint8 src0, uint8 src1, uint8 src2); uint16 __ovld amd_bfe(uint16 src0, uint16 src1, uint16 src2); uint __ovld amd_bfm(uint src0, uint src1); uint2 __ovld amd_bfm(uint2 src0, uint2 src1); uint3 __ovld amd_bfm(uint3 src0, uint3 src1); uint4 __ovld amd_bfm(uint4 src0, uint4 src1); uint8 __ovld amd_bfm(uint8 src0, uint8 src1); uint16 __ovld amd_bfm(uint16 src0, uint16 src1); float __ovld amd_max3(float src0, float src1, float src2); float2 __ovld amd_max3(float2 src0, float2 src1, float2 src2); float3 __ovld amd_max3(float3 src0, float3 src1, float3 src2); float4 __ovld amd_max3(float4 src0, float4 src1, float4 src2); float8 __ovld amd_max3(float8 src0, float8 src1, float8 src2); float16 __ovld amd_max3(float16 src0, float16 src1, float16 src2); int __ovld amd_max3(int src0, int src1, int src2); int2 __ovld amd_max3(int2 src0, int2 src1, int2 src2); int3 __ovld amd_max3(int3 src0, int3 src1, int3 src2); int4 __ovld amd_max3(int4 src0, int4 src1, int4 src2); int8 __ovld amd_max3(int8 src0, int8 src1, int8 src2); int16 __ovld amd_max3(int16 src0, int16 src1, int16 src2); uint __ovld amd_max3(uint src0, uint src1, uint src2); uint2 __ovld amd_max3(uint2 src0, uint2 src1, uint2 src2); uint3 __ovld amd_max3(uint3 src0, uint3 src1, uint3 src2); uint4 __ovld amd_max3(uint4 src0, uint4 src1, uint4 src2); uint8 __ovld amd_max3(uint8 src0, uint8 src1, uint8 src2); uint16 __ovld amd_max3(uint16 src0, uint16 src1, uint16 src2); float __ovld amd_median3(float src0, float src1, float src2); float2 __ovld amd_median3(float2 src0, float2 src1, float2 src2); float3 __ovld amd_median3(float3 src0, float3 src1, float3 src2); float4 __ovld amd_median3(float4 src0, float4 src1, float4 src2); float8 __ovld amd_median3(float8 src0, float8 src1, float8 src2); float16 __ovld amd_median3(float16 src0, float16 src1, float16 src2); int __ovld amd_median3(int src0, int src1, int src2); int2 __ovld amd_median3(int2 src0, int2 src1, int2 src2); int3 __ovld amd_median3(int3 src0, int3 src1, int3 src2); int4 __ovld amd_median3(int4 src0, int4 src1, int4 src2); int8 __ovld amd_median3(int8 src0, int8 src1, int8 src2); int16 __ovld amd_median3(int16 src0, int16 src1, int16 src2); uint __ovld amd_median3(uint src0, uint src1, uint src2); uint2 __ovld amd_median3(uint2 src0, uint2 src1, uint2 src2); uint3 __ovld amd_median3(uint3 src0, uint3 src1, uint3 src2); uint4 __ovld amd_median3(uint4 src0, uint4 src1, uint4 src2); uint8 __ovld amd_median3(uint8 src0, uint8 src1, uint8 src2); uint16 __ovld amd_median3(uint16 src0, uint16 src1, uint16 src2); float __ovld amd_min3(float src0, float src1, float src); float2 __ovld amd_min3(float2 src0, float2 src1, float2 src); float3 __ovld amd_min3(float3 src0, float3 src1, float3 src); float4 __ovld amd_min3(float4 src0, float4 src1, float4 src); float8 __ovld amd_min3(float8 src0, float8 src1, float8 src); float16 __ovld amd_min3(float16 src0, float16 src1, float16 src); int __ovld amd_min3(int src0, int src1, int src2); int2 __ovld amd_min3(int2 src0, int2 src1, int2 src2); int3 __ovld amd_min3(int3 src0, int3 src1, int3 src2); int4 __ovld amd_min3(int4 src0, int4 src1, int4 src2); int8 __ovld amd_min3(int8 src0, int8 src1, int8 src2); int16 __ovld amd_min3(int16 src0, int16 src1, int16 src2); uint __ovld amd_min3(uint src0, uint src1, uint src2); uint2 __ovld amd_min3(uint2 src0, uint2 src1, uint2 src2); uint3 __ovld amd_min3(uint3 src0, uint3 src1, uint3 src2); uint4 __ovld amd_min3(uint4 src0, uint4 src1, uint4 src2); uint8 __ovld amd_min3(uint8 src0, uint8 src1, uint8 src2); uint16 __ovld amd_min3(uint16 src0, uint16 src1, uint16 src2); ulong __ovld amd_mqsad(ulong src0, uint src1, ulong src2); ulong2 __ovld amd_mqsad(ulong2 src0, uint2 src1, ulong2 src2); ulong3 __ovld amd_mqsad(ulong3 src0, uint3 src1, ulong3 src2); ulong4 __ovld amd_mqsad(ulong4 src0, uint4 src1, ulong4 src2); ulong8 __ovld amd_mqsad(ulong8 src0, uint8 src1, ulong8 src2); ulong16 __ovld amd_mqsad(ulong16 src0, uint16 src1, ulong16 src2); ulong __ovld amd_qsad(ulong src0, uint src1, ulong src2); ulong2 __ovld amd_qsad(ulong2 src0, uint2 src1, ulong2 src2); ulong3 __ovld amd_qsad(ulong3 src0, uint3 src1, ulong3 src2); ulong4 __ovld amd_qsad(ulong4 src0, uint4 src1, ulong4 src2); ulong8 __ovld amd_qsad(ulong8 src0, uint8 src1, ulong8 src2); ulong16 __ovld amd_qsad(ulong16 src0, uint16 src1, ulong16 src2); uint __ovld amd_msad(uint src0, uint src1, uint src2); uint2 __ovld amd_msad(uint2 src0, uint2 src1, uint2 src2); uint3 __ovld amd_msad(uint3 src0, uint3 src1, uint3 src2); uint4 __ovld amd_msad(uint4 src0, uint4 src1, uint4 src2); uint8 __ovld amd_msad(uint8 src0, uint8 src1, uint8 src2); uint16 __ovld amd_msad(uint16 src0, uint16 src1, uint16 src2); uint __ovld amd_sadd(uint src0, uint src1, uint src2); uint2 __ovld amd_sadd(uint2 src0, uint2 src1, uint2 src2); uint3 __ovld amd_sadd(uint3 src0, uint3 src1, uint3 src2); uint4 __ovld amd_sadd(uint4 src0, uint4 src1, uint4 src2); uint8 __ovld amd_sadd(uint8 src0, uint8 src1, uint8 src2); uint16 __ovld amd_sadd(uint16 src0, uint16 src1, uint16 src2); uint __ovld amd_sadw(uint src0, uint src1, uint src2); uint2 __ovld amd_sadw(uint2 src0, uint2 src1, uint2 src2); uint3 __ovld amd_sadw(uint3 src0, uint3 src1, uint3 src2); uint4 __ovld amd_sadw(uint4 src0, uint4 src1, uint4 src2); uint8 __ovld amd_sadw(uint8 src0, uint8 src1, uint8 src2); uint16 __ovld amd_sadw(uint16 src0, uint16 src1, uint16 src2); #endif // cl_amd_media_ops2 #if defined(cl_arm_integer_dot_product_int8) uint __ovld arm_dot(uchar4, uchar4); int __ovld arm_dot(char4, char4); #endif // defined(cl_arm_integer_dot_product_int8) #if defined(cl_arm_integer_dot_product_accumulate_int8) uint __ovld arm_dot_acc(uchar4, uchar4, uint); int __ovld arm_dot_acc(char4, char4, int); #endif // defined(cl_arm_integer_dot_product_accumulate_int8) #if defined(cl_arm_integer_dot_product_accumulate_int16) uint __ovld arm_dot_acc(ushort2, ushort2, uint); int __ovld arm_dot_acc(short2, short2, int); #endif // defined(cl_arm_integer_dot_product_accumulate_int16) #if defined(cl_arm_integer_dot_product_accumulate_saturate_int8) uint __ovld arm_dot_acc_sat(uchar4, uchar4, uint); int __ovld arm_dot_acc_sat(char4, char4, int); #endif // defined(cl_arm_integer_dot_product_accumulate_saturate_int8) // Disable any extensions we may have enabled previously. #pragma OPENCL EXTENSION all : disable #undef __opencl_c_named_address_space_builtins #undef __cnfn #undef __ovld #endif //_OPENCL_H_ /*===---- pconfigintrin.h - X86 platform configuration ---------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #if !defined __X86INTRIN_H && !defined __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __PCONFIGINTRIN_H #define __PCONFIGINTRIN_H #define __PCONFIG_KEY_PROGRAM 0x00000001 #if __has_extension(gnu_asm) /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("pconfig"))) static __inline unsigned int __DEFAULT_FN_ATTRS _pconfig_u32(unsigned int __leaf, __SIZE_TYPE__ __d[]) { unsigned int __result; __asm__ ("pconfig" : "=a" (__result), "=b" (__d[0]), "=c" (__d[1]), "=d" (__d[2]) : "a" (__leaf), "b" (__d[0]), "c" (__d[1]), "d" (__d[2]) : "cc"); return __result; } #undef __DEFAULT_FN_ATTRS #endif /* __has_extension(gnu_asm) */ #endif /*===---- pkuintrin.h - PKU intrinsics -------------------------------------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __PKUINTRIN_H #define __PKUINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("pku"))) static __inline__ unsigned int __DEFAULT_FN_ATTRS _rdpkru_u32(void) { return __builtin_ia32_rdpkru(); } static __inline__ void __DEFAULT_FN_ATTRS _wrpkru(unsigned int __val) { __builtin_ia32_wrpkru(__val); } #undef __DEFAULT_FN_ATTRS #endif /*===---- pmmintrin.h - SSE3 intrinsics ------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __PMMINTRIN_H #define __PMMINTRIN_H #if !defined(__i386__) && !defined(__x86_64__) #error "This header is only meant to be used on x86 and x64 architecture" #endif #include /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("sse3"), __min_vector_width__(128))) /// Loads data from an unaligned memory location to elements in a 128-bit /// vector. /// /// If the address of the data is not 16-byte aligned, the instruction may /// read two adjacent aligned blocks of memory to retrieve the requested /// data. /// /// \headerfile /// /// This intrinsic corresponds to the VLDDQU instruction. /// /// \param __p /// A pointer to a 128-bit integer vector containing integer values. /// \returns A 128-bit vector containing the moved values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_lddqu_si128(__m128i_u const *__p) { return (__m128i)__builtin_ia32_lddqu((char const *)__p); } /// Adds the even-indexed values and subtracts the odd-indexed values of /// two 128-bit vectors of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VADDSUBPS instruction. /// /// \param __a /// A 128-bit vector of [4 x float] containing the left source operand. /// \param __b /// A 128-bit vector of [4 x float] containing the right source operand. /// \returns A 128-bit vector of [4 x float] containing the alternating sums and /// differences of both operands. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_addsub_ps(__m128 __a, __m128 __b) { return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b); } /// Horizontally adds the adjacent pairs of values contained in two /// 128-bit vectors of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VHADDPS instruction. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the source operands. /// The horizontal sums of the values are stored in the lower bits of the /// destination. /// \param __b /// A 128-bit vector of [4 x float] containing one of the source operands. /// The horizontal sums of the values are stored in the upper bits of the /// destination. /// \returns A 128-bit vector of [4 x float] containing the horizontal sums of /// both operands. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_hadd_ps(__m128 __a, __m128 __b) { return __builtin_ia32_haddps((__v4sf)__a, (__v4sf)__b); } /// Horizontally subtracts the adjacent pairs of values contained in two /// 128-bit vectors of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VHSUBPS instruction. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the source operands. /// The horizontal differences between the values are stored in the lower /// bits of the destination. /// \param __b /// A 128-bit vector of [4 x float] containing one of the source operands. /// The horizontal differences between the values are stored in the upper /// bits of the destination. /// \returns A 128-bit vector of [4 x float] containing the horizontal /// differences of both operands. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_hsub_ps(__m128 __a, __m128 __b) { return __builtin_ia32_hsubps((__v4sf)__a, (__v4sf)__b); } /// Moves and duplicates odd-indexed values from a 128-bit vector /// of [4 x float] to float values stored in a 128-bit vector of /// [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVSHDUP instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. \n /// Bits [127:96] of the source are written to bits [127:96] and [95:64] of /// the destination. \n /// Bits [63:32] of the source are written to bits [63:32] and [31:0] of the /// destination. /// \returns A 128-bit vector of [4 x float] containing the moved and duplicated /// values. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_movehdup_ps(__m128 __a) { return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 1, 1, 3, 3); } /// Duplicates even-indexed values from a 128-bit vector of /// [4 x float] to float values stored in a 128-bit vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVSLDUP instruction. /// /// \param __a /// A 128-bit vector of [4 x float] \n /// Bits [95:64] of the source are written to bits [127:96] and [95:64] of /// the destination. \n /// Bits [31:0] of the source are written to bits [63:32] and [31:0] of the /// destination. /// \returns A 128-bit vector of [4 x float] containing the moved and duplicated /// values. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_moveldup_ps(__m128 __a) { return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 2, 2); } /// Adds the even-indexed values and subtracts the odd-indexed values of /// two 128-bit vectors of [2 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VADDSUBPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing the left source operand. /// \param __b /// A 128-bit vector of [2 x double] containing the right source operand. /// \returns A 128-bit vector of [2 x double] containing the alternating sums /// and differences of both operands. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_addsub_pd(__m128d __a, __m128d __b) { return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b); } /// Horizontally adds the pairs of values contained in two 128-bit /// vectors of [2 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VHADDPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing one of the source operands. /// The horizontal sum of the values is stored in the lower bits of the /// destination. /// \param __b /// A 128-bit vector of [2 x double] containing one of the source operands. /// The horizontal sum of the values is stored in the upper bits of the /// destination. /// \returns A 128-bit vector of [2 x double] containing the horizontal sums of /// both operands. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_hadd_pd(__m128d __a, __m128d __b) { return __builtin_ia32_haddpd((__v2df)__a, (__v2df)__b); } /// Horizontally subtracts the pairs of values contained in two 128-bit /// vectors of [2 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VHSUBPD instruction. /// /// \param __a /// A 128-bit vector of [2 x double] containing one of the source operands. /// The horizontal difference of the values is stored in the lower bits of /// the destination. /// \param __b /// A 128-bit vector of [2 x double] containing one of the source operands. /// The horizontal difference of the values is stored in the upper bits of /// the destination. /// \returns A 128-bit vector of [2 x double] containing the horizontal /// differences of both operands. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_hsub_pd(__m128d __a, __m128d __b) { return __builtin_ia32_hsubpd((__v2df)__a, (__v2df)__b); } /// Moves and duplicates one double-precision value to double-precision /// values stored in a 128-bit vector of [2 x double]. /// /// \headerfile /// /// \code /// __m128d _mm_loaddup_pd(double const *dp); /// \endcode /// /// This intrinsic corresponds to the VMOVDDUP instruction. /// /// \param dp /// A pointer to a double-precision value to be moved and duplicated. /// \returns A 128-bit vector of [2 x double] containing the moved and /// duplicated values. #define _mm_loaddup_pd(dp) _mm_load1_pd(dp) /// Moves and duplicates the double-precision value in the lower bits of /// a 128-bit vector of [2 x double] to double-precision values stored in a /// 128-bit vector of [2 x double]. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVDDUP instruction. /// /// \param __a /// A 128-bit vector of [2 x double]. Bits [63:0] are written to bits /// [127:64] and [63:0] of the destination. /// \returns A 128-bit vector of [2 x double] containing the moved and /// duplicated values. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_movedup_pd(__m128d __a) { return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0); } /// Establishes a linear address memory range to be monitored and puts /// the processor in the monitor event pending state. Data stored in the /// monitored address range causes the processor to exit the pending state. /// /// The \c MONITOR instruction can be used in kernel mode, and in other modes /// if MSR C001_0015h[MonMwaitUserEn] is set. /// /// \headerfile /// /// This intrinsic corresponds to the \c MONITOR instruction. /// /// \param __p /// The memory range to be monitored. The size of the range is determined by /// CPUID function 0000_0005h. /// \param __extensions /// Optional extensions for the monitoring state. /// \param __hints /// Optional hints for the monitoring state. static __inline__ void __DEFAULT_FN_ATTRS _mm_monitor(void const *__p, unsigned __extensions, unsigned __hints) { __builtin_ia32_monitor(__p, __extensions, __hints); } /// Used with the \c MONITOR instruction to wait while the processor is in /// the monitor event pending state. Data stored in the monitored address /// range, or an interrupt, causes the processor to exit the pending state. /// /// The \c MWAIT instruction can be used in kernel mode, and in other modes if /// MSR C001_0015h[MonMwaitUserEn] is set. /// /// \headerfile /// /// This intrinsic corresponds to the \c MWAIT instruction. /// /// \param __extensions /// Optional extensions for the monitoring state, which can vary by /// processor. /// \param __hints /// Optional hints for the monitoring state, which can vary by processor. static __inline__ void __DEFAULT_FN_ATTRS _mm_mwait(unsigned __extensions, unsigned __hints) { __builtin_ia32_mwait(__extensions, __hints); } #undef __DEFAULT_FN_ATTRS #endif /* __PMMINTRIN_H */ /*===---- popcntintrin.h - POPCNT intrinsics -------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __POPCNTINTRIN_H #define __POPCNTINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("popcnt"))) #if defined(__cplusplus) && (__cplusplus >= 201103L) #define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr #else #define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS #endif /// Counts the number of bits in the source operand having a value of 1. /// /// \headerfile /// /// This intrinsic corresponds to the POPCNT instruction. /// /// \param __A /// An unsigned 32-bit integer operand. /// \returns A 32-bit integer containing the number of bits with value 1 in the /// source operand. static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_popcnt_u32(unsigned int __A) { return __builtin_popcount(__A); } #ifdef __x86_64__ /// Counts the number of bits in the source operand having a value of 1. /// /// \headerfile /// /// This intrinsic corresponds to the POPCNT instruction. /// /// \param __A /// An unsigned 64-bit integer operand. /// \returns A 64-bit integer containing the number of bits with value 1 in the /// source operand. static __inline__ long long __DEFAULT_FN_ATTRS_CONSTEXPR _mm_popcnt_u64(unsigned long long __A) { return __builtin_popcountll(__A); } #endif /* __x86_64__ */ #undef __DEFAULT_FN_ATTRS #undef __DEFAULT_FN_ATTRS_CONSTEXPR #endif /* __POPCNTINTRIN_H */ /*===---- prfchiintrin.h - PREFETCHI intrinsic -----------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __PRFCHIINTRIN_H #define __PRFCHIINTRIN_H #ifdef __x86_64__ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("prefetchi"))) /// Loads an instruction sequence containing the specified memory address into /// all level cache. /// /// Note that the effect of this intrinsic is dependent on the processor /// implementation. /// /// \headerfile /// /// This intrinsic corresponds to the \c PREFETCHIT0 instruction. /// /// \param __P /// A pointer specifying the memory address to be prefetched. static __inline__ void __DEFAULT_FN_ATTRS _m_prefetchit0(volatile const void *__P) { #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wcast-qual" __builtin_ia32_prefetchi((const void *)__P, 3 /* _MM_HINT_T0 */); #pragma clang diagnostic pop } /// Loads an instruction sequence containing the specified memory address into /// all but the first-level cache. /// /// Note that the effect of this intrinsic is dependent on the processor /// implementation. /// /// \headerfile /// /// This intrinsic corresponds to the \c PREFETCHIT1 instruction. /// /// \param __P /// A pointer specifying the memory address to be prefetched. static __inline__ void __DEFAULT_FN_ATTRS _m_prefetchit1(volatile const void *__P) { #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wcast-qual" __builtin_ia32_prefetchi((const void *)__P, 2 /* _MM_HINT_T1 */); #pragma clang diagnostic pop } #endif /* __x86_64__ */ #undef __DEFAULT_FN_ATTRS #endif /* __PRFCHWINTRIN_H */ /*===---- prfchwintrin.h - PREFETCHW intrinsic -----------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #if !defined(__X86INTRIN_H) && !defined(_MM3DNOW_H_INCLUDED) #error "Never use directly; include or instead." #endif #ifndef __PRFCHWINTRIN_H #define __PRFCHWINTRIN_H /// Loads a memory sequence containing the specified memory address into /// all data cache levels. The cache-coherency state is set to exclusive. /// Data can be read from and written to the cache line without additional /// delay. /// /// \headerfile /// /// This intrinsic corresponds to the \c PREFETCHT0 instruction. /// /// \param __P /// A pointer specifying the memory address to be prefetched. static __inline__ void __attribute__((__always_inline__, __nodebug__)) _m_prefetch(void *__P) { __builtin_prefetch (__P, 0, 3 /* _MM_HINT_T0 */); } /// Loads a memory sequence containing the specified memory address into /// the L1 data cache and sets the cache-coherency to modified. This /// provides a hint to the processor that the cache line will be modified. /// It is intended for use when the cache line will be written to shortly /// after the prefetch is performed. /// /// Note that the effect of this intrinsic is dependent on the processor /// implementation. /// /// \headerfile /// /// This intrinsic corresponds to the \c PREFETCHW instruction. /// /// \param __P /// A pointer specifying the memory address to be prefetched. static __inline__ void __attribute__((__always_inline__, __nodebug__)) _m_prefetchw(volatile const void *__P) { #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wcast-qual" __builtin_prefetch ((const void*)__P, 1, 3 /* _MM_HINT_T0 */); #pragma clang diagnostic pop } #endif /* __PRFCHWINTRIN_H */ /*===------------ ptwriteintrin.h - PTWRITE intrinsic --------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #if !defined __X86INTRIN_H && !defined __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __PTWRITEINTRIN_H #define __PTWRITEINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("ptwrite"))) static __inline__ void __DEFAULT_FN_ATTRS _ptwrite32(unsigned int __value) { __builtin_ia32_ptwrite32(__value); } #ifdef __x86_64__ static __inline__ void __DEFAULT_FN_ATTRS _ptwrite64(unsigned long long __value) { __builtin_ia32_ptwrite64(__value); } #endif /* __x86_64__ */ #undef __DEFAULT_FN_ATTRS #endif /* __PTWRITEINTRIN_H */ /*===----------------------- raointintrin.h - RAOINT ------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __X86GPRINTRIN_H #error "Never use directly; include instead." #endif // __X86GPRINTRIN_H #ifndef __RAOINTINTRIN_H #define __RAOINTINTRIN_H #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("raoint"))) /// Atomically add a 32-bit value at memory operand \a __A and a 32-bit \a __B, /// and store the result to the same memory location. /// /// This intrinsic should be used for contention or weak ordering. It may /// result in bad performance for hot data used by single thread only. /// /// \headerfile /// /// This intrinsic corresponds to the \c AADD instruction. /// /// \param __A /// A pointer to a 32-bit memory location. /// \param __B /// A 32-bit integer value. /// /// \code{.operation} /// MEM[__A+31:__A] := MEM[__A+31:__A] + __B[31:0] /// \endcode static __inline__ void __DEFAULT_FN_ATTRS _aadd_i32(int *__A, int __B) { __builtin_ia32_aadd32((int *)__A, __B); } /// Atomically and a 32-bit value at memory operand \a __A and a 32-bit \a __B, /// and store the result to the same memory location. /// /// This intrinsic should be used for contention or weak ordering. It may /// result in bad performance for hot data used by single thread only. /// /// \headerfile /// /// This intrinsic corresponds to the \c AAND instruction. /// /// \param __A /// A pointer to a 32-bit memory location. /// \param __B /// A 32-bit integer value. /// /// \code{.operation} /// MEM[__A+31:__A] := MEM[__A+31:__A] AND __B[31:0] /// \endcode static __inline__ void __DEFAULT_FN_ATTRS _aand_i32(int *__A, int __B) { __builtin_ia32_aand32((int *)__A, __B); } /// Atomically or a 32-bit value at memory operand \a __A and a 32-bit \a __B, /// and store the result to the same memory location. /// /// This intrinsic should be used for contention or weak ordering. It may /// result in bad performance for hot data used by single thread only. /// /// \headerfile /// /// This intrinsic corresponds to the \c AOR instruction. /// /// \param __A /// A pointer to a 32-bit memory location. /// \param __B /// A 32-bit integer value. /// /// \code{.operation} /// MEM[__A+31:__A] := MEM[__A+31:__A] OR __B[31:0] /// \endcode static __inline__ void __DEFAULT_FN_ATTRS _aor_i32(int *__A, int __B) { __builtin_ia32_aor32((int *)__A, __B); } /// Atomically xor a 32-bit value at memory operand \a __A and a 32-bit \a __B, /// and store the result to the same memory location. /// /// This intrinsic should be used for contention or weak ordering. It may /// result in bad performance for hot data used by single thread only. /// /// \headerfile /// /// This intrinsic corresponds to the \c AXOR instruction. /// /// \param __A /// A pointer to a 32-bit memory location. /// \param __B /// A 32-bit integer value. /// /// \code{.operation} /// MEM[__A+31:__A] := MEM[__A+31:__A] XOR __B[31:0] /// \endcode static __inline__ void __DEFAULT_FN_ATTRS _axor_i32(int *__A, int __B) { __builtin_ia32_axor32((int *)__A, __B); } #ifdef __x86_64__ /// Atomically add a 64-bit value at memory operand \a __A and a 64-bit \a __B, /// and store the result to the same memory location. /// /// This intrinsic should be used for contention or weak ordering. It may /// result in bad performance for hot data used by single thread only. /// /// \headerfile /// /// This intrinsic corresponds to the \c AADD instruction. /// /// \param __A /// A pointer to a 64-bit memory location. /// \param __B /// A 64-bit integer value. /// /// \code{.operation} /// MEM[__A+63:__A] := MEM[__A+63:__A] + __B[63:0] /// \endcode static __inline__ void __DEFAULT_FN_ATTRS _aadd_i64(long long *__A, long long __B) { __builtin_ia32_aadd64((long long *)__A, __B); } /// Atomically and a 64-bit value at memory operand \a __A and a 64-bit \a __B, /// and store the result to the same memory location. /// /// This intrinsic should be used for contention or weak ordering. It may /// result in bad performance for hot data used by single thread only. /// /// \headerfile /// /// This intrinsic corresponds to the \c AAND instruction. /// /// \param __A /// A pointer to a 64-bit memory location. /// \param __B /// A 64-bit integer value. /// /// \code{.operation} /// MEM[__A+63:__A] := MEM[__A+63:__A] AND __B[63:0] /// \endcode static __inline__ void __DEFAULT_FN_ATTRS _aand_i64(long long *__A, long long __B) { __builtin_ia32_aand64((long long *)__A, __B); } /// Atomically or a 64-bit value at memory operand \a __A and a 64-bit \a __B, /// and store the result to the same memory location. /// /// This intrinsic should be used for contention or weak ordering. It may /// result in bad performance for hot data used by single thread only. /// /// \headerfile /// /// This intrinsic corresponds to the \c AOR instruction. /// /// \param __A /// A pointer to a 64-bit memory location. /// \param __B /// A 64-bit integer value. /// /// \code{.operation} /// MEM[__A+63:__A] := MEM[__A+63:__A] OR __B[63:0] /// \endcode static __inline__ void __DEFAULT_FN_ATTRS _aor_i64(long long *__A, long long __B) { __builtin_ia32_aor64((long long *)__A, __B); } /// Atomically xor a 64-bit value at memory operand \a __A and a 64-bit \a __B, /// and store the result to the same memory location. /// /// This intrinsic should be used for contention or weak ordering. It may /// result in bad performance for hot data used by single thread only. /// /// \headerfile /// /// This intrinsic corresponds to the \c AXOR instruction. /// /// \param __A /// A pointer to a 64-bit memory location. /// \param __B /// A 64-bit integer value. /// /// \code{.operation} /// MEM[__A+63:__A] := MEM[__A+63:__A] XOR __B[63:0] /// \endcode static __inline__ void __DEFAULT_FN_ATTRS _axor_i64(long long *__A, long long __B) { __builtin_ia32_axor64((long long *)__A, __B); } #endif // __x86_64__ #undef __DEFAULT_FN_ATTRS #endif // __RAOINTINTRIN_H /*===---- rdpruintrin.h - RDPRU intrinsics ---------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #if !defined __X86INTRIN_H #error "Never use directly; include instead." #endif #ifndef __RDPRUINTRIN_H #define __RDPRUINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("rdpru"))) /// Reads the content of a processor register. /// /// \headerfile /// /// This intrinsic corresponds to the RDPRU instruction. /// /// \param reg_id /// A processor register identifier. static __inline__ unsigned long long __DEFAULT_FN_ATTRS __rdpru (int reg_id) { return __builtin_ia32_rdpru(reg_id); } #define __RDPRU_MPERF 0 #define __RDPRU_APERF 1 /// Reads the content of processor register MPERF. /// /// \headerfile /// /// This intrinsic generates instruction RDPRU to read the value of /// register MPERF. #define __mperf() __builtin_ia32_rdpru(__RDPRU_MPERF) /// Reads the content of processor register APERF. /// /// \headerfile /// /// This intrinsic generates instruction RDPRU to read the value of /// register APERF. #define __aperf() __builtin_ia32_rdpru(__RDPRU_APERF) #undef __DEFAULT_FN_ATTRS #endif /* __RDPRUINTRIN_H */ /*===---- rdseedintrin.h - RDSEED intrinsics -------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __RDSEEDINTRIN_H #define __RDSEEDINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("rdseed"))) /// Stores a hardware-generated 16-bit random value in the memory at \a __p. /// /// The random number generator complies with NIST SP800-90B and SP800-90C. /// /// \code{.operation} /// IF HW_NRND_GEN.ready == 1 /// Store16(__p, HW_NRND_GEN.data) /// result := 1 /// ELSE /// Store16(__p, 0) /// result := 0 /// END /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c RDSEED instruction. /// /// \param __p /// Pointer to memory for storing the 16-bit random number. /// \returns 1 if a random number was generated, 0 if not. static __inline__ int __DEFAULT_FN_ATTRS _rdseed16_step(unsigned short *__p) { return (int) __builtin_ia32_rdseed16_step(__p); } /// Stores a hardware-generated 32-bit random value in the memory at \a __p. /// /// The random number generator complies with NIST SP800-90B and SP800-90C. /// /// \code{.operation} /// IF HW_NRND_GEN.ready == 1 /// Store32(__p, HW_NRND_GEN.data) /// result := 1 /// ELSE /// Store32(__p, 0) /// result := 0 /// END /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c RDSEED instruction. /// /// \param __p /// Pointer to memory for storing the 32-bit random number. /// \returns 1 if a random number was generated, 0 if not. static __inline__ int __DEFAULT_FN_ATTRS _rdseed32_step(unsigned int *__p) { return (int) __builtin_ia32_rdseed32_step(__p); } #ifdef __x86_64__ /// Stores a hardware-generated 64-bit random value in the memory at \a __p. /// /// The random number generator complies with NIST SP800-90B and SP800-90C. /// /// \code{.operation} /// IF HW_NRND_GEN.ready == 1 /// Store64(__p, HW_NRND_GEN.data) /// result := 1 /// ELSE /// Store64(__p, 0) /// result := 0 /// END /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c RDSEED instruction. /// /// \param __p /// Pointer to memory for storing the 64-bit random number. /// \returns 1 if a random number was generated, 0 if not. static __inline__ int __DEFAULT_FN_ATTRS _rdseed64_step(unsigned long long *__p) { return (int) __builtin_ia32_rdseed64_step(__p); } #endif #undef __DEFAULT_FN_ATTRS #endif /* __RDSEEDINTRIN_H */ /*===---- riscv_ntlh.h - RISC-V NTLH intrinsics ----------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __RISCV_NTLH_H #define __RISCV_NTLH_H #ifndef __riscv_zihintntl #error "NTLH intrinsics require the NTLH extension." #endif enum { __RISCV_NTLH_INNERMOST_PRIVATE = 2, __RISCV_NTLH_ALL_PRIVATE, __RISCV_NTLH_INNERMOST_SHARED, __RISCV_NTLH_ALL }; #define __riscv_ntl_load(PTR, DOMAIN) __builtin_riscv_ntl_load((PTR), (DOMAIN)) #define __riscv_ntl_store(PTR, VAL, DOMAIN) \ __builtin_riscv_ntl_store((PTR), (VAL), (DOMAIN)) #endif/*===---- riscv_vector.h - RISC-V V-extension RVVIntrinsics -------------------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __RISCV_VECTOR_H #define __RISCV_VECTOR_H #include #include #ifndef __riscv_vector #error "Vector intrinsics require the vector extension." #endif #ifdef __cplusplus extern "C" { #endif #pragma clang riscv intrinsic vector enum __RISCV_FRM { __RISCV_FRM_RNE = 0, __RISCV_FRM_RTZ = 1, __RISCV_FRM_RDN = 2, __RISCV_FRM_RUP = 3, __RISCV_FRM_RMM = 4, }; #define __riscv_vlenb() __builtin_rvv_vlenb() #define __riscv_vsetvl_e8mf4(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 6) #define __riscv_vsetvl_e8mf2(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 7) #define __riscv_vsetvl_e8m1(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 0) #define __riscv_vsetvl_e8m2(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 1) #define __riscv_vsetvl_e8m4(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 2) #define __riscv_vsetvl_e8m8(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 3) #define __riscv_vsetvl_e16mf2(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 7) #define __riscv_vsetvl_e16m1(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 0) #define __riscv_vsetvl_e16m2(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 1) #define __riscv_vsetvl_e16m4(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 2) #define __riscv_vsetvl_e16m8(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 3) #define __riscv_vsetvl_e32m1(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 0) #define __riscv_vsetvl_e32m2(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 1) #define __riscv_vsetvl_e32m4(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 2) #define __riscv_vsetvl_e32m8(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 3) #if __riscv_v_elen >= 64 #define __riscv_vsetvl_e8mf8(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 5) #define __riscv_vsetvl_e16mf4(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 6) #define __riscv_vsetvl_e32mf2(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 7) #define __riscv_vsetvl_e64m1(avl) __builtin_rvv_vsetvli((size_t)(avl), 3, 0) #define __riscv_vsetvl_e64m2(avl) __builtin_rvv_vsetvli((size_t)(avl), 3, 1) #define __riscv_vsetvl_e64m4(avl) __builtin_rvv_vsetvli((size_t)(avl), 3, 2) #define __riscv_vsetvl_e64m8(avl) __builtin_rvv_vsetvli((size_t)(avl), 3, 3) #endif #define __riscv_vsetvlmax_e8mf4() __builtin_rvv_vsetvlimax(0, 6) #define __riscv_vsetvlmax_e8mf2() __builtin_rvv_vsetvlimax(0, 7) #define __riscv_vsetvlmax_e8m1() __builtin_rvv_vsetvlimax(0, 0) #define __riscv_vsetvlmax_e8m2() __builtin_rvv_vsetvlimax(0, 1) #define __riscv_vsetvlmax_e8m4() __builtin_rvv_vsetvlimax(0, 2) #define __riscv_vsetvlmax_e8m8() __builtin_rvv_vsetvlimax(0, 3) #define __riscv_vsetvlmax_e16mf2() __builtin_rvv_vsetvlimax(1, 7) #define __riscv_vsetvlmax_e16m1() __builtin_rvv_vsetvlimax(1, 0) #define __riscv_vsetvlmax_e16m2() __builtin_rvv_vsetvlimax(1, 1) #define __riscv_vsetvlmax_e16m4() __builtin_rvv_vsetvlimax(1, 2) #define __riscv_vsetvlmax_e16m8() __builtin_rvv_vsetvlimax(1, 3) #define __riscv_vsetvlmax_e32m1() __builtin_rvv_vsetvlimax(2, 0) #define __riscv_vsetvlmax_e32m2() __builtin_rvv_vsetvlimax(2, 1) #define __riscv_vsetvlmax_e32m4() __builtin_rvv_vsetvlimax(2, 2) #define __riscv_vsetvlmax_e32m8() __builtin_rvv_vsetvlimax(2, 3) #if __riscv_v_elen >= 64 #define __riscv_vsetvlmax_e8mf8() __builtin_rvv_vsetvlimax(0, 5) #define __riscv_vsetvlmax_e16mf4() __builtin_rvv_vsetvlimax(1, 6) #define __riscv_vsetvlmax_e32mf2() __builtin_rvv_vsetvlimax(2, 7) #define __riscv_vsetvlmax_e64m1() __builtin_rvv_vsetvlimax(3, 0) #define __riscv_vsetvlmax_e64m2() __builtin_rvv_vsetvlimax(3, 1) #define __riscv_vsetvlmax_e64m4() __builtin_rvv_vsetvlimax(3, 2) #define __riscv_vsetvlmax_e64m8() __builtin_rvv_vsetvlimax(3, 3) #endif enum __RISCV_VXRM { __RISCV_VXRM_RNU = 0, __RISCV_VXRM_RNE = 1, __RISCV_VXRM_RDN = 2, __RISCV_VXRM_ROD = 3, }; typedef __rvv_bool64_t vbool64_t; typedef __rvv_bool32_t vbool32_t; typedef __rvv_bool16_t vbool16_t; typedef __rvv_bool8_t vbool8_t; typedef __rvv_bool4_t vbool4_t; typedef __rvv_bool2_t vbool2_t; typedef __rvv_bool1_t vbool1_t; typedef __rvv_int8mf8_t vint8mf8_t; typedef __rvv_uint8mf8_t vuint8mf8_t; typedef __rvv_int8mf8x2_t vint8mf8x2_t; typedef __rvv_uint8mf8x2_t vuint8mf8x2_t; typedef __rvv_int8mf8x3_t vint8mf8x3_t; typedef __rvv_uint8mf8x3_t vuint8mf8x3_t; typedef __rvv_int8mf8x4_t vint8mf8x4_t; typedef __rvv_uint8mf8x4_t vuint8mf8x4_t; typedef __rvv_int8mf8x5_t vint8mf8x5_t; typedef __rvv_uint8mf8x5_t vuint8mf8x5_t; typedef __rvv_int8mf8x6_t vint8mf8x6_t; typedef __rvv_uint8mf8x6_t vuint8mf8x6_t; typedef __rvv_int8mf8x7_t vint8mf8x7_t; typedef __rvv_uint8mf8x7_t vuint8mf8x7_t; typedef __rvv_int8mf8x8_t vint8mf8x8_t; typedef __rvv_uint8mf8x8_t vuint8mf8x8_t; typedef __rvv_int8mf4_t vint8mf4_t; typedef __rvv_uint8mf4_t vuint8mf4_t; typedef __rvv_int8mf4x2_t vint8mf4x2_t; typedef __rvv_uint8mf4x2_t vuint8mf4x2_t; typedef __rvv_int8mf4x3_t vint8mf4x3_t; typedef __rvv_uint8mf4x3_t vuint8mf4x3_t; typedef __rvv_int8mf4x4_t vint8mf4x4_t; typedef __rvv_uint8mf4x4_t vuint8mf4x4_t; typedef __rvv_int8mf4x5_t vint8mf4x5_t; typedef __rvv_uint8mf4x5_t vuint8mf4x5_t; typedef __rvv_int8mf4x6_t vint8mf4x6_t; typedef __rvv_uint8mf4x6_t vuint8mf4x6_t; typedef __rvv_int8mf4x7_t vint8mf4x7_t; typedef __rvv_uint8mf4x7_t vuint8mf4x7_t; typedef __rvv_int8mf4x8_t vint8mf4x8_t; typedef __rvv_uint8mf4x8_t vuint8mf4x8_t; typedef __rvv_int8mf2_t vint8mf2_t; typedef __rvv_uint8mf2_t vuint8mf2_t; typedef __rvv_int8mf2x2_t vint8mf2x2_t; typedef __rvv_uint8mf2x2_t vuint8mf2x2_t; typedef __rvv_int8mf2x3_t vint8mf2x3_t; typedef __rvv_uint8mf2x3_t vuint8mf2x3_t; typedef __rvv_int8mf2x4_t vint8mf2x4_t; typedef __rvv_uint8mf2x4_t vuint8mf2x4_t; typedef __rvv_int8mf2x5_t vint8mf2x5_t; typedef __rvv_uint8mf2x5_t vuint8mf2x5_t; typedef __rvv_int8mf2x6_t vint8mf2x6_t; typedef __rvv_uint8mf2x6_t vuint8mf2x6_t; typedef __rvv_int8mf2x7_t vint8mf2x7_t; typedef __rvv_uint8mf2x7_t vuint8mf2x7_t; typedef __rvv_int8mf2x8_t vint8mf2x8_t; typedef __rvv_uint8mf2x8_t vuint8mf2x8_t; typedef __rvv_int8m1_t vint8m1_t; typedef __rvv_uint8m1_t vuint8m1_t; typedef __rvv_int8m1x2_t vint8m1x2_t; typedef __rvv_uint8m1x2_t vuint8m1x2_t; typedef __rvv_int8m1x3_t vint8m1x3_t; typedef __rvv_uint8m1x3_t vuint8m1x3_t; typedef __rvv_int8m1x4_t vint8m1x4_t; typedef __rvv_uint8m1x4_t vuint8m1x4_t; typedef __rvv_int8m1x5_t vint8m1x5_t; typedef __rvv_uint8m1x5_t vuint8m1x5_t; typedef __rvv_int8m1x6_t vint8m1x6_t; typedef __rvv_uint8m1x6_t vuint8m1x6_t; typedef __rvv_int8m1x7_t vint8m1x7_t; typedef __rvv_uint8m1x7_t vuint8m1x7_t; typedef __rvv_int8m1x8_t vint8m1x8_t; typedef __rvv_uint8m1x8_t vuint8m1x8_t; typedef __rvv_int8m2_t vint8m2_t; typedef __rvv_uint8m2_t vuint8m2_t; typedef __rvv_int8m2x2_t vint8m2x2_t; typedef __rvv_uint8m2x2_t vuint8m2x2_t; typedef __rvv_int8m2x3_t vint8m2x3_t; typedef __rvv_uint8m2x3_t vuint8m2x3_t; typedef __rvv_int8m2x4_t vint8m2x4_t; typedef __rvv_uint8m2x4_t vuint8m2x4_t; typedef __rvv_int8m4_t vint8m4_t; typedef __rvv_uint8m4_t vuint8m4_t; typedef __rvv_int8m4x2_t vint8m4x2_t; typedef __rvv_uint8m4x2_t vuint8m4x2_t; typedef __rvv_int8m8_t vint8m8_t; typedef __rvv_uint8m8_t vuint8m8_t; typedef __rvv_int16mf4_t vint16mf4_t; typedef __rvv_uint16mf4_t vuint16mf4_t; typedef __rvv_int16mf4x2_t vint16mf4x2_t; typedef __rvv_uint16mf4x2_t vuint16mf4x2_t; typedef __rvv_int16mf4x3_t vint16mf4x3_t; typedef __rvv_uint16mf4x3_t vuint16mf4x3_t; typedef __rvv_int16mf4x4_t vint16mf4x4_t; typedef __rvv_uint16mf4x4_t vuint16mf4x4_t; typedef __rvv_int16mf4x5_t vint16mf4x5_t; typedef __rvv_uint16mf4x5_t vuint16mf4x5_t; typedef __rvv_int16mf4x6_t vint16mf4x6_t; typedef __rvv_uint16mf4x6_t vuint16mf4x6_t; typedef __rvv_int16mf4x7_t vint16mf4x7_t; typedef __rvv_uint16mf4x7_t vuint16mf4x7_t; typedef __rvv_int16mf4x8_t vint16mf4x8_t; typedef __rvv_uint16mf4x8_t vuint16mf4x8_t; typedef __rvv_int16mf2_t vint16mf2_t; typedef __rvv_uint16mf2_t vuint16mf2_t; typedef __rvv_int16mf2x2_t vint16mf2x2_t; typedef __rvv_uint16mf2x2_t vuint16mf2x2_t; typedef __rvv_int16mf2x3_t vint16mf2x3_t; typedef __rvv_uint16mf2x3_t vuint16mf2x3_t; typedef __rvv_int16mf2x4_t vint16mf2x4_t; typedef __rvv_uint16mf2x4_t vuint16mf2x4_t; typedef __rvv_int16mf2x5_t vint16mf2x5_t; typedef __rvv_uint16mf2x5_t vuint16mf2x5_t; typedef __rvv_int16mf2x6_t vint16mf2x6_t; typedef __rvv_uint16mf2x6_t vuint16mf2x6_t; typedef __rvv_int16mf2x7_t vint16mf2x7_t; typedef __rvv_uint16mf2x7_t vuint16mf2x7_t; typedef __rvv_int16mf2x8_t vint16mf2x8_t; typedef __rvv_uint16mf2x8_t vuint16mf2x8_t; typedef __rvv_int16m1_t vint16m1_t; typedef __rvv_uint16m1_t vuint16m1_t; typedef __rvv_int16m1x2_t vint16m1x2_t; typedef __rvv_uint16m1x2_t vuint16m1x2_t; typedef __rvv_int16m1x3_t vint16m1x3_t; typedef __rvv_uint16m1x3_t vuint16m1x3_t; typedef __rvv_int16m1x4_t vint16m1x4_t; typedef __rvv_uint16m1x4_t vuint16m1x4_t; typedef __rvv_int16m1x5_t vint16m1x5_t; typedef __rvv_uint16m1x5_t vuint16m1x5_t; typedef __rvv_int16m1x6_t vint16m1x6_t; typedef __rvv_uint16m1x6_t vuint16m1x6_t; typedef __rvv_int16m1x7_t vint16m1x7_t; typedef __rvv_uint16m1x7_t vuint16m1x7_t; typedef __rvv_int16m1x8_t vint16m1x8_t; typedef __rvv_uint16m1x8_t vuint16m1x8_t; typedef __rvv_int16m2_t vint16m2_t; typedef __rvv_uint16m2_t vuint16m2_t; typedef __rvv_int16m2x2_t vint16m2x2_t; typedef __rvv_uint16m2x2_t vuint16m2x2_t; typedef __rvv_int16m2x3_t vint16m2x3_t; typedef __rvv_uint16m2x3_t vuint16m2x3_t; typedef __rvv_int16m2x4_t vint16m2x4_t; typedef __rvv_uint16m2x4_t vuint16m2x4_t; typedef __rvv_int16m4_t vint16m4_t; typedef __rvv_uint16m4_t vuint16m4_t; typedef __rvv_int16m4x2_t vint16m4x2_t; typedef __rvv_uint16m4x2_t vuint16m4x2_t; typedef __rvv_int16m8_t vint16m8_t; typedef __rvv_uint16m8_t vuint16m8_t; typedef __rvv_int32mf2_t vint32mf2_t; typedef __rvv_uint32mf2_t vuint32mf2_t; typedef __rvv_int32mf2x2_t vint32mf2x2_t; typedef __rvv_uint32mf2x2_t vuint32mf2x2_t; typedef __rvv_int32mf2x3_t vint32mf2x3_t; typedef __rvv_uint32mf2x3_t vuint32mf2x3_t; typedef __rvv_int32mf2x4_t vint32mf2x4_t; typedef __rvv_uint32mf2x4_t vuint32mf2x4_t; typedef __rvv_int32mf2x5_t vint32mf2x5_t; typedef __rvv_uint32mf2x5_t vuint32mf2x5_t; typedef __rvv_int32mf2x6_t vint32mf2x6_t; typedef __rvv_uint32mf2x6_t vuint32mf2x6_t; typedef __rvv_int32mf2x7_t vint32mf2x7_t; typedef __rvv_uint32mf2x7_t vuint32mf2x7_t; typedef __rvv_int32mf2x8_t vint32mf2x8_t; typedef __rvv_uint32mf2x8_t vuint32mf2x8_t; typedef __rvv_int32m1_t vint32m1_t; typedef __rvv_uint32m1_t vuint32m1_t; typedef __rvv_int32m1x2_t vint32m1x2_t; typedef __rvv_uint32m1x2_t vuint32m1x2_t; typedef __rvv_int32m1x3_t vint32m1x3_t; typedef __rvv_uint32m1x3_t vuint32m1x3_t; typedef __rvv_int32m1x4_t vint32m1x4_t; typedef __rvv_uint32m1x4_t vuint32m1x4_t; typedef __rvv_int32m1x5_t vint32m1x5_t; typedef __rvv_uint32m1x5_t vuint32m1x5_t; typedef __rvv_int32m1x6_t vint32m1x6_t; typedef __rvv_uint32m1x6_t vuint32m1x6_t; typedef __rvv_int32m1x7_t vint32m1x7_t; typedef __rvv_uint32m1x7_t vuint32m1x7_t; typedef __rvv_int32m1x8_t vint32m1x8_t; typedef __rvv_uint32m1x8_t vuint32m1x8_t; typedef __rvv_int32m2_t vint32m2_t; typedef __rvv_uint32m2_t vuint32m2_t; typedef __rvv_int32m2x2_t vint32m2x2_t; typedef __rvv_uint32m2x2_t vuint32m2x2_t; typedef __rvv_int32m2x3_t vint32m2x3_t; typedef __rvv_uint32m2x3_t vuint32m2x3_t; typedef __rvv_int32m2x4_t vint32m2x4_t; typedef __rvv_uint32m2x4_t vuint32m2x4_t; typedef __rvv_int32m4_t vint32m4_t; typedef __rvv_uint32m4_t vuint32m4_t; typedef __rvv_int32m4x2_t vint32m4x2_t; typedef __rvv_uint32m4x2_t vuint32m4x2_t; typedef __rvv_int32m8_t vint32m8_t; typedef __rvv_uint32m8_t vuint32m8_t; typedef __rvv_int64m1_t vint64m1_t; typedef __rvv_uint64m1_t vuint64m1_t; typedef __rvv_int64m1x2_t vint64m1x2_t; typedef __rvv_uint64m1x2_t vuint64m1x2_t; typedef __rvv_int64m1x3_t vint64m1x3_t; typedef __rvv_uint64m1x3_t vuint64m1x3_t; typedef __rvv_int64m1x4_t vint64m1x4_t; typedef __rvv_uint64m1x4_t vuint64m1x4_t; typedef __rvv_int64m1x5_t vint64m1x5_t; typedef __rvv_uint64m1x5_t vuint64m1x5_t; typedef __rvv_int64m1x6_t vint64m1x6_t; typedef __rvv_uint64m1x6_t vuint64m1x6_t; typedef __rvv_int64m1x7_t vint64m1x7_t; typedef __rvv_uint64m1x7_t vuint64m1x7_t; typedef __rvv_int64m1x8_t vint64m1x8_t; typedef __rvv_uint64m1x8_t vuint64m1x8_t; typedef __rvv_int64m2_t vint64m2_t; typedef __rvv_uint64m2_t vuint64m2_t; typedef __rvv_int64m2x2_t vint64m2x2_t; typedef __rvv_uint64m2x2_t vuint64m2x2_t; typedef __rvv_int64m2x3_t vint64m2x3_t; typedef __rvv_uint64m2x3_t vuint64m2x3_t; typedef __rvv_int64m2x4_t vint64m2x4_t; typedef __rvv_uint64m2x4_t vuint64m2x4_t; typedef __rvv_int64m4_t vint64m4_t; typedef __rvv_uint64m4_t vuint64m4_t; typedef __rvv_int64m4x2_t vint64m4x2_t; typedef __rvv_uint64m4x2_t vuint64m4x2_t; typedef __rvv_int64m8_t vint64m8_t; typedef __rvv_uint64m8_t vuint64m8_t; typedef __rvv_float16mf4_t vfloat16mf4_t; typedef __rvv_float16mf4x2_t vfloat16mf4x2_t; typedef __rvv_float16mf4x3_t vfloat16mf4x3_t; typedef __rvv_float16mf4x4_t vfloat16mf4x4_t; typedef __rvv_float16mf4x5_t vfloat16mf4x5_t; typedef __rvv_float16mf4x6_t vfloat16mf4x6_t; typedef __rvv_float16mf4x7_t vfloat16mf4x7_t; typedef __rvv_float16mf4x8_t vfloat16mf4x8_t; typedef __rvv_float16mf2_t vfloat16mf2_t; typedef __rvv_float16mf2x2_t vfloat16mf2x2_t; typedef __rvv_float16mf2x3_t vfloat16mf2x3_t; typedef __rvv_float16mf2x4_t vfloat16mf2x4_t; typedef __rvv_float16mf2x5_t vfloat16mf2x5_t; typedef __rvv_float16mf2x6_t vfloat16mf2x6_t; typedef __rvv_float16mf2x7_t vfloat16mf2x7_t; typedef __rvv_float16mf2x8_t vfloat16mf2x8_t; typedef __rvv_float16m1_t vfloat16m1_t; typedef __rvv_float16m1x2_t vfloat16m1x2_t; typedef __rvv_float16m1x3_t vfloat16m1x3_t; typedef __rvv_float16m1x4_t vfloat16m1x4_t; typedef __rvv_float16m1x5_t vfloat16m1x5_t; typedef __rvv_float16m1x6_t vfloat16m1x6_t; typedef __rvv_float16m1x7_t vfloat16m1x7_t; typedef __rvv_float16m1x8_t vfloat16m1x8_t; typedef __rvv_float16m2_t vfloat16m2_t; typedef __rvv_float16m2x2_t vfloat16m2x2_t; typedef __rvv_float16m2x3_t vfloat16m2x3_t; typedef __rvv_float16m2x4_t vfloat16m2x4_t; typedef __rvv_float16m4_t vfloat16m4_t; typedef __rvv_float16m4x2_t vfloat16m4x2_t; typedef __rvv_float16m8_t vfloat16m8_t; typedef __rvv_float32mf2_t vfloat32mf2_t; typedef __rvv_float32mf2x2_t vfloat32mf2x2_t; typedef __rvv_float32mf2x3_t vfloat32mf2x3_t; typedef __rvv_float32mf2x4_t vfloat32mf2x4_t; typedef __rvv_float32mf2x5_t vfloat32mf2x5_t; typedef __rvv_float32mf2x6_t vfloat32mf2x6_t; typedef __rvv_float32mf2x7_t vfloat32mf2x7_t; typedef __rvv_float32mf2x8_t vfloat32mf2x8_t; typedef __rvv_float32m1_t vfloat32m1_t; typedef __rvv_float32m1x2_t vfloat32m1x2_t; typedef __rvv_float32m1x3_t vfloat32m1x3_t; typedef __rvv_float32m1x4_t vfloat32m1x4_t; typedef __rvv_float32m1x5_t vfloat32m1x5_t; typedef __rvv_float32m1x6_t vfloat32m1x6_t; typedef __rvv_float32m1x7_t vfloat32m1x7_t; typedef __rvv_float32m1x8_t vfloat32m1x8_t; typedef __rvv_float32m2_t vfloat32m2_t; typedef __rvv_float32m2x2_t vfloat32m2x2_t; typedef __rvv_float32m2x3_t vfloat32m2x3_t; typedef __rvv_float32m2x4_t vfloat32m2x4_t; typedef __rvv_float32m4_t vfloat32m4_t; typedef __rvv_float32m4x2_t vfloat32m4x2_t; typedef __rvv_float32m8_t vfloat32m8_t; typedef __rvv_float64m1_t vfloat64m1_t; typedef __rvv_float64m1x2_t vfloat64m1x2_t; typedef __rvv_float64m1x3_t vfloat64m1x3_t; typedef __rvv_float64m1x4_t vfloat64m1x4_t; typedef __rvv_float64m1x5_t vfloat64m1x5_t; typedef __rvv_float64m1x6_t vfloat64m1x6_t; typedef __rvv_float64m1x7_t vfloat64m1x7_t; typedef __rvv_float64m1x8_t vfloat64m1x8_t; typedef __rvv_float64m2_t vfloat64m2_t; typedef __rvv_float64m2x2_t vfloat64m2x2_t; typedef __rvv_float64m2x3_t vfloat64m2x3_t; typedef __rvv_float64m2x4_t vfloat64m2x4_t; typedef __rvv_float64m4_t vfloat64m4_t; typedef __rvv_float64m4x2_t vfloat64m4x2_t; typedef __rvv_float64m8_t vfloat64m8_t; #define __riscv_v_intrinsic_overloading 1 #ifdef __cplusplus } #endif // __cplusplus #endif // __RISCV_VECTOR_H /*===---- rtmintrin.h - RTM intrinsics -------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __RTMINTRIN_H #define __RTMINTRIN_H #define _XBEGIN_STARTED (~0u) #define _XABORT_EXPLICIT (1 << 0) #define _XABORT_RETRY (1 << 1) #define _XABORT_CONFLICT (1 << 2) #define _XABORT_CAPACITY (1 << 3) #define _XABORT_DEBUG (1 << 4) #define _XABORT_NESTED (1 << 5) #define _XABORT_CODE(x) (((x) >> 24) & 0xFF) /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("rtm"))) static __inline__ unsigned int __DEFAULT_FN_ATTRS _xbegin(void) { return (unsigned int)__builtin_ia32_xbegin(); } static __inline__ void __DEFAULT_FN_ATTRS _xend(void) { __builtin_ia32_xend(); } #define _xabort(imm) __builtin_ia32_xabort((imm)) #undef __DEFAULT_FN_ATTRS #endif /* __RTMINTRIN_H */ /*===---- s390intrin.h - SystemZ intrinsics --------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __S390INTRIN_H #define __S390INTRIN_H #ifndef __s390__ #error " is for s390 only" #endif #ifdef __HTM__ #include #endif #ifdef __VEC__ #include #endif #endif /* __S390INTRIN_H*/ /*===--------------- serializeintrin.h - serialize intrinsics --------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __SERIALIZEINTRIN_H #define __SERIALIZEINTRIN_H /// Serialize instruction fetch and execution. /// /// \headerfile /// /// This intrinsic corresponds to the SERIALIZE instruction. /// static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("serialize"))) _serialize (void) { __builtin_ia32_serialize (); } #endif /* __SERIALIZEINTRIN_H */ /*===---- sgxintrin.h - X86 SGX intrinsics configuration -------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #if !defined __X86INTRIN_H && !defined __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __SGXINTRIN_H #define __SGXINTRIN_H #if __has_extension(gnu_asm) /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("sgx"))) static __inline unsigned int __DEFAULT_FN_ATTRS _enclu_u32(unsigned int __leaf, __SIZE_TYPE__ __d[]) { unsigned int __result; __asm__ ("enclu" : "=a" (__result), "=b" (__d[0]), "=c" (__d[1]), "=d" (__d[2]) : "a" (__leaf), "b" (__d[0]), "c" (__d[1]), "d" (__d[2]) : "cc"); return __result; } static __inline unsigned int __DEFAULT_FN_ATTRS _encls_u32(unsigned int __leaf, __SIZE_TYPE__ __d[]) { unsigned int __result; __asm__ ("encls" : "=a" (__result), "=b" (__d[0]), "=c" (__d[1]), "=d" (__d[2]) : "a" (__leaf), "b" (__d[0]), "c" (__d[1]), "d" (__d[2]) : "cc"); return __result; } static __inline unsigned int __DEFAULT_FN_ATTRS _enclv_u32(unsigned int __leaf, __SIZE_TYPE__ __d[]) { unsigned int __result; __asm__ ("enclv" : "=a" (__result), "=b" (__d[0]), "=c" (__d[1]), "=d" (__d[2]) : "a" (__leaf), "b" (__d[0]), "c" (__d[1]), "d" (__d[2]) : "cc"); return __result; } #undef __DEFAULT_FN_ATTRS #endif /* __has_extension(gnu_asm) */ #endif /*===--------------- sha512intrin.h - SHA512 intrinsics -----------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif // __IMMINTRIN_H #ifndef __SHA512INTRIN_H #define __SHA512INTRIN_H #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, __target__("sha512"), \ __min_vector_width__(256))) /// This intrinisc is one of the two SHA512 message scheduling instructions. /// The intrinsic performs an intermediate calculation for the next four /// SHA512 message qwords. The calculated results are stored in \a dst. /// /// \headerfile /// /// \code /// __m256i _mm256_sha512msg1_epi64(__m256i __A, __m128i __B) /// \endcode /// /// This intrinsic corresponds to the \c VSHA512MSG1 instruction. /// /// \param __A /// A 256-bit vector of [4 x long long]. /// \param __B /// A 128-bit vector of [2 x long long]. /// \returns /// A 256-bit vector of [4 x long long]. /// /// \code{.operation} /// DEFINE ROR64(qword, n) { /// count := n % 64 /// dest := (qword >> count) | (qword << (64 - count)) /// RETURN dest /// } /// DEFINE SHR64(qword, n) { /// RETURN qword >> n /// } /// DEFINE s0(qword): /// RETURN ROR64(qword,1) ^ ROR64(qword, 8) ^ SHR64(qword, 7) /// } /// W[4] := __B.qword[0] /// W[3] := __A.qword[3] /// W[2] := __A.qword[2] /// W[1] := __A.qword[1] /// W[0] := __A.qword[0] /// dst.qword[3] := W[3] + s0(W[4]) /// dst.qword[2] := W[2] + s0(W[3]) /// dst.qword[1] := W[1] + s0(W[2]) /// dst.qword[0] := W[0] + s0(W[1]) /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sha512msg1_epi64(__m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_vsha512msg1((__v4du)__A, (__v2du)__B); } /// This intrinisc is one of the two SHA512 message scheduling instructions. /// The intrinsic performs the final calculation for the next four SHA512 /// message qwords. The calculated results are stored in \a dst. /// /// \headerfile /// /// \code /// __m256i _mm256_sha512msg2_epi64(__m256i __A, __m256i __B) /// \endcode /// /// This intrinsic corresponds to the \c VSHA512MSG2 instruction. /// /// \param __A /// A 256-bit vector of [4 x long long]. /// \param __B /// A 256-bit vector of [4 x long long]. /// \returns /// A 256-bit vector of [4 x long long]. /// /// \code{.operation} /// DEFINE ROR64(qword, n) { /// count := n % 64 /// dest := (qword >> count) | (qword << (64 - count)) /// RETURN dest /// } /// DEFINE SHR64(qword, n) { /// RETURN qword >> n /// } /// DEFINE s1(qword) { /// RETURN ROR64(qword,19) ^ ROR64(qword, 61) ^ SHR64(qword, 6) /// } /// W[14] := __B.qword[2] /// W[15] := __B.qword[3] /// W[16] := __A.qword[0] + s1(W[14]) /// W[17] := __A.qword[1] + s1(W[15]) /// W[18] := __A.qword[2] + s1(W[16]) /// W[19] := __A.qword[3] + s1(W[17]) /// dst.qword[3] := W[19] /// dst.qword[2] := W[18] /// dst.qword[1] := W[17] /// dst.qword[0] := W[16] /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sha512msg2_epi64(__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vsha512msg2((__v4du)__A, (__v4du)__B); } /// This intrinisc performs two rounds of SHA512 operation using initial SHA512 /// state (C,D,G,H) from \a __A, an initial SHA512 state (A,B,E,F) from /// \a __A, and a pre-computed sum of the next two round message qwords and /// the corresponding round constants from \a __C (only the two lower qwords /// of the third operand). The updated SHA512 state (A,B,E,F) is written to /// \a __A, and \a __A can be used as the updated state (C,D,G,H) in later /// rounds. /// /// \headerfile /// /// \code /// __m256i _mm256_sha512rnds2_epi64(__m256i __A, __m256i __B, __m128i __C) /// \endcode /// /// This intrinsic corresponds to the \c VSHA512RNDS2 instruction. /// /// \param __A /// A 256-bit vector of [4 x long long]. /// \param __B /// A 256-bit vector of [4 x long long]. /// \param __C /// A 128-bit vector of [2 x long long]. /// \returns /// A 256-bit vector of [4 x long long]. /// /// \code{.operation} /// DEFINE ROR64(qword, n) { /// count := n % 64 /// dest := (qword >> count) | (qword << (64 - count)) /// RETURN dest /// } /// DEFINE SHR64(qword, n) { /// RETURN qword >> n /// } /// DEFINE cap_sigma0(qword) { /// RETURN ROR64(qword,28) ^ ROR64(qword, 34) ^ ROR64(qword, 39) /// } /// DEFINE cap_sigma1(qword) { /// RETURN ROR64(qword,14) ^ ROR64(qword, 18) ^ ROR64(qword, 41) /// } /// DEFINE MAJ(a,b,c) { /// RETURN (a & b) ^ (a & c) ^ (b & c) /// } /// DEFINE CH(e,f,g) { /// RETURN (e & f) ^ (g & ~e) /// } /// A[0] := __B.qword[3] /// B[0] := __B.qword[2] /// C[0] := __C.qword[3] /// D[0] := __C.qword[2] /// E[0] := __B.qword[1] /// F[0] := __B.qword[0] /// G[0] := __C.qword[1] /// H[0] := __C.qword[0] /// WK[0]:= __A.qword[0] /// WK[1]:= __A.qword[1] /// FOR i := 0 to 1: /// A[i+1] := CH(E[i], F[i], G[i]) + /// cap_sigma1(E[i]) + WK[i] + H[i] + /// MAJ(A[i], B[i], C[i]) + /// cap_sigma0(A[i]) /// B[i+1] := A[i] /// C[i+1] := B[i] /// D[i+1] := C[i] /// E[i+1] := CH(E[i], F[i], G[i]) + /// cap_sigma1(E[i]) + WK[i] + H[i] + D[i] /// F[i+1] := E[i] /// G[i+1] := F[i] /// H[i+1] := G[i] /// ENDFOR /// dst.qword[3] := A[2] /// dst.qword[2] := B[2] /// dst.qword[1] := E[2] /// dst.qword[0] := F[2] /// dst[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sha512rnds2_epi64(__m256i __A, __m256i __B, __m128i __C) { return (__m256i)__builtin_ia32_vsha512rnds2((__v4du)__A, (__v4du)__B, (__v2du)__C); } #undef __DEFAULT_FN_ATTRS256 #endif // __SHA512INTRIN_H /*===---- shaintrin.h - SHA intrinsics -------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __SHAINTRIN_H #define __SHAINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sha"), __min_vector_width__(128))) /// Performs four iterations of the inner loop of the SHA-1 message digest /// algorithm using the starting SHA-1 state (A, B, C, D) from the 128-bit /// vector of [4 x i32] in \a V1 and the next four 32-bit elements of the /// message from the 128-bit vector of [4 x i32] in \a V2. Note that the /// SHA-1 state variable E must have already been added to \a V2 /// (\c _mm_sha1nexte_epu32() can perform this step). Returns the updated /// SHA-1 state (A, B, C, D) as a 128-bit vector of [4 x i32]. /// /// The SHA-1 algorithm has an inner loop of 80 iterations, twenty each /// with a different combining function and rounding constant. This /// intrinsic performs four iterations using a combining function and /// rounding constant selected by \a M[1:0]. /// /// \headerfile /// /// \code /// __m128i _mm_sha1rnds4_epu32(__m128i V1, __m128i V2, const int M); /// \endcode /// /// This intrinsic corresponds to the \c SHA1RNDS4 instruction. /// /// \param V1 /// A 128-bit vector of [4 x i32] containing the initial SHA-1 state. /// \param V2 /// A 128-bit vector of [4 x i32] containing the next four elements of /// the message, plus SHA-1 state variable E. /// \param M /// An immediate value where bits [1:0] select among four possible /// combining functions and rounding constants (not specified here). /// \returns A 128-bit vector of [4 x i32] containing the updated SHA-1 state. #define _mm_sha1rnds4_epu32(V1, V2, M) \ __builtin_ia32_sha1rnds4((__v4si)(__m128i)(V1), (__v4si)(__m128i)(V2), (M)) /// Calculates the SHA-1 state variable E from the SHA-1 state variables in /// the 128-bit vector of [4 x i32] in \a __X, adds that to the next set of /// four message elements in the 128-bit vector of [4 x i32] in \a __Y, and /// returns the result. /// /// \headerfile /// /// This intrinsic corresponds to the \c SHA1NEXTE instruction. /// /// \param __X /// A 128-bit vector of [4 x i32] containing the current SHA-1 state. /// \param __Y /// A 128-bit vector of [4 x i32] containing the next four elements of the /// message. /// \returns A 128-bit vector of [4 x i32] containing the updated SHA-1 /// values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sha1nexte_epu32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_sha1nexte((__v4si)__X, (__v4si)__Y); } /// Performs an intermediate calculation for deriving the next four SHA-1 /// message elements using previous message elements from the 128-bit /// vectors of [4 x i32] in \a __X and \a __Y, and returns the result. /// /// \headerfile /// /// This intrinsic corresponds to the \c SHA1MSG1 instruction. /// /// \param __X /// A 128-bit vector of [4 x i32] containing previous message elements. /// \param __Y /// A 128-bit vector of [4 x i32] containing previous message elements. /// \returns A 128-bit vector of [4 x i32] containing the derived SHA-1 /// elements. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sha1msg1_epu32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_sha1msg1((__v4si)__X, (__v4si)__Y); } /// Performs the final calculation for deriving the next four SHA-1 message /// elements using previous message elements from the 128-bit vectors of /// [4 x i32] in \a __X and \a __Y, and returns the result. /// /// \headerfile /// /// This intrinsic corresponds to the \c SHA1MSG2 instruction. /// /// \param __X /// A 128-bit vector of [4 x i32] containing an intermediate result. /// \param __Y /// A 128-bit vector of [4 x i32] containing previous message values. /// \returns A 128-bit vector of [4 x i32] containing the updated SHA-1 /// values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sha1msg2_epu32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_sha1msg2((__v4si)__X, (__v4si)__Y); } /// Performs two rounds of SHA-256 operation using the following inputs: a /// starting SHA-256 state (C, D, G, H) from the 128-bit vector of /// [4 x i32] in \a __X; a starting SHA-256 state (A, B, E, F) from the /// 128-bit vector of [4 x i32] in \a __Y; and a pre-computed sum of the /// next two message elements (unsigned 32-bit integers) and corresponding /// rounding constants from the 128-bit vector of [4 x i32] in \a __Z. /// Returns the updated SHA-256 state (A, B, E, F) as a 128-bit vector of /// [4 x i32]. /// /// The SHA-256 algorithm has a core loop of 64 iterations. This intrinsic /// performs two of those iterations. /// /// \headerfile /// /// This intrinsic corresponds to the \c SHA256RNDS2 instruction. /// /// \param __X /// A 128-bit vector of [4 x i32] containing part of the initial SHA-256 /// state. /// \param __Y /// A 128-bit vector of [4 x i32] containing part of the initial SHA-256 /// state. /// \param __Z /// A 128-bit vector of [4 x i32] containing additional input to the /// SHA-256 operation. /// \returns A 128-bit vector of [4 x i32] containing the updated SHA-1 state. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sha256rnds2_epu32(__m128i __X, __m128i __Y, __m128i __Z) { return (__m128i)__builtin_ia32_sha256rnds2((__v4si)__X, (__v4si)__Y, (__v4si)__Z); } /// Performs an intermediate calculation for deriving the next four SHA-256 /// message elements using previous message elements from the 128-bit /// vectors of [4 x i32] in \a __X and \a __Y, and returns the result. /// /// \headerfile /// /// This intrinsic corresponds to the \c SHA256MSG1 instruction. /// /// \param __X /// A 128-bit vector of [4 x i32] containing previous message elements. /// \param __Y /// A 128-bit vector of [4 x i32] containing previous message elements. /// \returns A 128-bit vector of [4 x i32] containing the updated SHA-256 /// values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sha256msg1_epu32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_sha256msg1((__v4si)__X, (__v4si)__Y); } /// Performs the final calculation for deriving the next four SHA-256 message /// elements using previous message elements from the 128-bit vectors of /// [4 x i32] in \a __X and \a __Y, and returns the result. /// /// \headerfile /// /// This intrinsic corresponds to the \c SHA256MSG2 instruction. /// /// \param __X /// A 128-bit vector of [4 x i32] containing an intermediate result. /// \param __Y /// A 128-bit vector of [4 x i32] containing previous message values. /// \returns A 128-bit vector of [4 x i32] containing the updated SHA-256 /// values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sha256msg2_epu32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_sha256msg2((__v4si)__X, (__v4si)__Y); } #undef __DEFAULT_FN_ATTRS #endif /* __SHAINTRIN_H */ //===----- sifive_vector.h - SiFive Vector definitions --------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef _SIFIVE_VECTOR_H_ #define _SIFIVE_VECTOR_H_ #include "riscv_vector.h" #pragma clang riscv intrinsic sifive_vector #endif //_SIFIVE_VECTOR_H_ /*===-------------------- sm3intrin.h - SM3 intrinsics ---------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif // __IMMINTRIN_H #ifndef __SM3INTRIN_H #define __SM3INTRIN_H #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, __target__("sm3"), \ __min_vector_width__(128))) /// This intrinisc is one of the two SM3 message scheduling intrinsics. The /// intrinsic performs an initial calculation for the next four SM3 message /// words. The calculated results are stored in \a dst. /// /// \headerfile /// /// \code /// __m128i _mm_sm3msg1_epi32(__m128i __A, __m128i __B, __m128i __C) /// \endcode /// /// This intrinsic corresponds to the \c VSM3MSG1 instruction. /// /// \param __A /// A 128-bit vector of [4 x int]. /// \param __B /// A 128-bit vector of [4 x int]. /// \param __C /// A 128-bit vector of [4 x int]. /// \returns /// A 128-bit vector of [4 x int]. /// /// \code{.operation} /// DEFINE ROL32(dword, n) { /// count := n % 32 /// dest := (dword << count) | (dword >> (32 - count)) /// RETURN dest /// } /// DEFINE P1(x) { /// RETURN x ^ ROL32(x, 15) ^ ROL32(x, 23) /// } /// W[0] := __C.dword[0] /// W[1] := __C.dword[1] /// W[2] := __C.dword[2] /// W[3] := __C.dword[3] /// W[7] := __A.dword[0] /// W[8] := __A.dword[1] /// W[9] := __A.dword[2] /// W[10] := __A.dword[3] /// W[13] := __B.dword[0] /// W[14] := __B.dword[1] /// W[15] := __B.dword[2] /// TMP0 := W[7] ^ W[0] ^ ROL32(W[13], 15) /// TMP1 := W[8] ^ W[1] ^ ROL32(W[14], 15) /// TMP2 := W[9] ^ W[2] ^ ROL32(W[15], 15) /// TMP3 := W[10] ^ W[3] /// dst.dword[0] := P1(TMP0) /// dst.dword[1] := P1(TMP1) /// dst.dword[2] := P1(TMP2) /// dst.dword[3] := P1(TMP3) /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sm3msg1_epi32(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vsm3msg1((__v4su)__A, (__v4su)__B, (__v4su)__C); } /// This intrinisc is one of the two SM3 message scheduling intrinsics. The /// intrinsic performs the final calculation for the next four SM3 message /// words. The calculated results are stored in \a dst. /// /// \headerfile /// /// \code /// __m128i _mm_sm3msg2_epi32(__m128i __A, __m128i __B, __m128i __C) /// \endcode /// /// This intrinsic corresponds to the \c VSM3MSG2 instruction. /// /// \param __A /// A 128-bit vector of [4 x int]. /// \param __B /// A 128-bit vector of [4 x int]. /// \param __C /// A 128-bit vector of [4 x int]. /// \returns /// A 128-bit vector of [4 x int]. /// /// \code{.operation} /// DEFINE ROL32(dword, n) { /// count := n % 32 /// dest := (dword << count) | (dword >> (32-count)) /// RETURN dest /// } /// WTMP[0] := __A.dword[0] /// WTMP[1] := __A.dword[1] /// WTMP[2] := __A.dword[2] /// WTMP[3] := __A.dword[3] /// W[3] := __B.dword[0] /// W[4] := __B.dword[1] /// W[5] := __B.dword[2] /// W[6] := __B.dword[3] /// W[10] := __C.dword[0] /// W[11] := __C.dword[1] /// W[12] := __C.dword[2] /// W[13] := __C.dword[3] /// W[16] := ROL32(W[3], 7) ^ W[10] ^ WTMP[0] /// W[17] := ROL32(W[4], 7) ^ W[11] ^ WTMP[1] /// W[18] := ROL32(W[5], 7) ^ W[12] ^ WTMP[2] /// W[19] := ROL32(W[6], 7) ^ W[13] ^ WTMP[3] /// W[19] := W[19] ^ ROL32(W[16], 6) ^ ROL32(W[16], 15) ^ ROL32(W[16], 30) /// dst.dword[0] := W[16] /// dst.dword[1] := W[17] /// dst.dword[2] := W[18] /// dst.dword[3] := W[19] /// dst[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sm3msg2_epi32(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vsm3msg2((__v4su)__A, (__v4su)__B, (__v4su)__C); } /// This intrinsic performs two rounds of SM3 operation using initial SM3 state /// (C, D, G, H) from \a __A, an initial SM3 states (A, B, E, F) /// from \a __B and a pre-computed words from the \a __C. \a __A with /// initial SM3 state of (C, D, G, H) assumes input of non-rotated left /// variables from previous state. The updated SM3 state (A, B, E, F) is /// written to \a __A. The \a imm8 should contain the even round number /// for the first of the two rounds computed by this instruction. The /// computation masks the \a imm8 value by AND’ing it with 0x3E so that only /// even round numbers from 0 through 62 are used for this operation. The /// calculated results are stored in \a dst. /// /// \headerfile /// /// \code /// __m128i _mm_sm3rnds2_epi32(__m128i __A, __m128i __B, __m128i __C, const int /// imm8) \endcode /// /// This intrinsic corresponds to the \c VSM3RNDS2 instruction. /// /// \param __A /// A 128-bit vector of [4 x int]. /// \param __B /// A 128-bit vector of [4 x int]. /// \param __C /// A 128-bit vector of [4 x int]. /// \param imm8 /// A 8-bit constant integer. /// \returns /// A 128-bit vector of [4 x int]. /// /// \code{.operation} /// DEFINE ROL32(dword, n) { /// count := n % 32 /// dest := (dword << count) | (dword >> (32-count)) /// RETURN dest /// } /// DEFINE P0(dword) { /// RETURN dword ^ ROL32(dword, 9) ^ ROL32(dword, 17) /// } /// DEFINE FF(x,y,z, round){ /// IF round < 16 /// RETURN (x ^ y ^ z) /// ELSE /// RETURN (x & y) | (x & z) | (y & z) /// FI /// } /// DEFINE GG(x, y, z, round){ /// IF round < 16 /// RETURN (x ^ y ^ z) /// ELSE /// RETURN (x & y) | (~x & z) /// FI /// } /// A[0] := __B.dword[3] /// B[0] := __B.dword[2] /// C[0] := __A.dword[3] /// D[0] := __A.dword[2] /// E[0] := __B.dword[1] /// F[0] := __B.dword[0] /// G[0] := __A.dword[1] /// H[0] := __A.dword[0] /// W[0] := __C.dword[0] /// W[1] := __C.dword[1] /// W[4] := __C.dword[2] /// W[5] := __C.dword[3] /// C[0] := ROL32(C[0], 9) /// D[0] := ROL32(D[0], 9) /// G[0] := ROL32(G[0], 19) /// H[0] := ROL32(H[0], 19) /// ROUND := __D & 0x3E /// IF ROUND < 16 /// CONST := 0x79CC4519 /// ELSE /// CONST := 0x7A879D8A /// FI /// CONST := ROL32(CONST,ROUND) /// FOR i:= 0 to 1 /// S1 := ROL32((ROL32(A[i], 12) + E[i] + CONST), 7) /// S2 := S1 ^ ROL32(A[i], 12) /// T1 := FF(A[i], B[i], C[i], ROUND) + D[i] + S2 + (W[i] ^ W[i+4]) /// T2 := GG(E[i], F[i], G[i], ROUND) + H[i] + S1 + W[i] /// D[i+1] := C[i] /// C[i+1] := ROL32(B[i],9) /// B[i+1] := A[i] /// A[i+1] := T1 /// H[i+1] := G[i] /// G[i+1] := ROL32(F[i], 19) /// F[i+1] := E[i] /// E[i+1] := P0(T2) /// CONST := ROL32(CONST, 1) /// ENDFOR /// dst.dword[3] := A[2] /// dst.dword[2] := B[2] /// dst.dword[1] := E[2] /// dst.dword[0] := F[2] /// dst[MAX:128] := 0 /// \endcode #define _mm_sm3rnds2_epi32(A, B, C, D) \ (__m128i) __builtin_ia32_vsm3rnds2((__v4su)A, (__v4su)B, (__v4su)C, (int)D) #undef __DEFAULT_FN_ATTRS128 #endif // __SM3INTRIN_H /*===--------------- sm4intrin.h - SM4 intrinsics -----------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif // __IMMINTRIN_H #ifndef __SM4INTRIN_H #define __SM4INTRIN_H /// This intrinsic performs four rounds of SM4 key expansion. The intrinsic /// operates on independent 128-bit lanes. The calculated results are /// stored in \a dst. /// \headerfile /// /// \code /// __m128i _mm_sm4key4_epi32(__m128i __A, __m128i __B) /// \endcode /// /// This intrinsic corresponds to the \c VSM4KEY4 instruction. /// /// \param __A /// A 128-bit vector of [4 x int]. /// \param __B /// A 128-bit vector of [4 x int]. /// \returns /// A 128-bit vector of [4 x int]. /// /// \code{.operation} /// DEFINE ROL32(dword, n) { /// count := n % 32 /// dest := (dword << count) | (dword >> (32-count)) /// RETURN dest /// } /// DEFINE SBOX_BYTE(dword, i) { /// RETURN sbox[dword.byte[i]] /// } /// DEFINE lower_t(dword) { /// tmp.byte[0] := SBOX_BYTE(dword, 0) /// tmp.byte[1] := SBOX_BYTE(dword, 1) /// tmp.byte[2] := SBOX_BYTE(dword, 2) /// tmp.byte[3] := SBOX_BYTE(dword, 3) /// RETURN tmp /// } /// DEFINE L_KEY(dword) { /// RETURN dword ^ ROL32(dword, 13) ^ ROL32(dword, 23) /// } /// DEFINE T_KEY(dword) { /// RETURN L_KEY(lower_t(dword)) /// } /// DEFINE F_KEY(X0, X1, X2, X3, round_key) { /// RETURN X0 ^ T_KEY(X1 ^ X2 ^ X3 ^ round_key) /// } /// FOR i:= 0 to 0 /// P[0] := __B.xmm[i].dword[0] /// P[1] := __B.xmm[i].dword[1] /// P[2] := __B.xmm[i].dword[2] /// P[3] := __B.xmm[i].dword[3] /// C[0] := F_KEY(P[0], P[1], P[2], P[3], __A.xmm[i].dword[0]) /// C[1] := F_KEY(P[1], P[2], P[3], C[0], __A.xmm[i].dword[1]) /// C[2] := F_KEY(P[2], P[3], C[0], C[1], __A.xmm[i].dword[2]) /// C[3] := F_KEY(P[3], C[0], C[1], C[2], __A.xmm[i].dword[3]) /// DEST.xmm[i].dword[0] := C[0] /// DEST.xmm[i].dword[1] := C[1] /// DEST.xmm[i].dword[2] := C[2] /// DEST.xmm[i].dword[3] := C[3] /// ENDFOR /// DEST[MAX:128] := 0 /// \endcode #define _mm_sm4key4_epi32(A, B) \ (__m128i) __builtin_ia32_vsm4key4128((__v4su)A, (__v4su)B) /// This intrinsic performs four rounds of SM4 key expansion. The intrinsic /// operates on independent 128-bit lanes. The calculated results are /// stored in \a dst. /// \headerfile /// /// \code /// __m256i _mm256_sm4key4_epi32(__m256i __A, __m256i __B) /// \endcode /// /// This intrinsic corresponds to the \c VSM4KEY4 instruction. /// /// \param __A /// A 256-bit vector of [8 x int]. /// \param __B /// A 256-bit vector of [8 x int]. /// \returns /// A 256-bit vector of [8 x int]. /// /// \code{.operation} /// DEFINE ROL32(dword, n) { /// count := n % 32 /// dest := (dword << count) | (dword >> (32-count)) /// RETURN dest /// } /// DEFINE SBOX_BYTE(dword, i) { /// RETURN sbox[dword.byte[i]] /// } /// DEFINE lower_t(dword) { /// tmp.byte[0] := SBOX_BYTE(dword, 0) /// tmp.byte[1] := SBOX_BYTE(dword, 1) /// tmp.byte[2] := SBOX_BYTE(dword, 2) /// tmp.byte[3] := SBOX_BYTE(dword, 3) /// RETURN tmp /// } /// DEFINE L_KEY(dword) { /// RETURN dword ^ ROL32(dword, 13) ^ ROL32(dword, 23) /// } /// DEFINE T_KEY(dword) { /// RETURN L_KEY(lower_t(dword)) /// } /// DEFINE F_KEY(X0, X1, X2, X3, round_key) { /// RETURN X0 ^ T_KEY(X1 ^ X2 ^ X3 ^ round_key) /// } /// FOR i:= 0 to 1 /// P[0] := __B.xmm[i].dword[0] /// P[1] := __B.xmm[i].dword[1] /// P[2] := __B.xmm[i].dword[2] /// P[3] := __B.xmm[i].dword[3] /// C[0] := F_KEY(P[0], P[1], P[2], P[3], __A.xmm[i].dword[0]) /// C[1] := F_KEY(P[1], P[2], P[3], C[0], __A.xmm[i].dword[1]) /// C[2] := F_KEY(P[2], P[3], C[0], C[1], __A.xmm[i].dword[2]) /// C[3] := F_KEY(P[3], C[0], C[1], C[2], __A.xmm[i].dword[3]) /// DEST.xmm[i].dword[0] := C[0] /// DEST.xmm[i].dword[1] := C[1] /// DEST.xmm[i].dword[2] := C[2] /// DEST.xmm[i].dword[3] := C[3] /// ENDFOR /// DEST[MAX:256] := 0 /// \endcode #define _mm256_sm4key4_epi32(A, B) \ (__m256i) __builtin_ia32_vsm4key4256((__v8su)A, (__v8su)B) /// This intrinisc performs four rounds of SM4 encryption. The intrinisc /// operates on independent 128-bit lanes. The calculated results are /// stored in \a dst. /// \headerfile /// /// \code /// __m128i _mm_sm4rnds4_epi32(__m128i __A, __m128i __B) /// \endcode /// /// This intrinsic corresponds to the \c VSM4RNDS4 instruction. /// /// \param __A /// A 128-bit vector of [4 x int]. /// \param __B /// A 128-bit vector of [4 x int]. /// \returns /// A 128-bit vector of [4 x int]. /// /// \code{.operation} /// DEFINE ROL32(dword, n) { /// count := n % 32 /// dest := (dword << count) | (dword >> (32-count)) /// RETURN dest /// } /// DEFINE lower_t(dword) { /// tmp.byte[0] := SBOX_BYTE(dword, 0) /// tmp.byte[1] := SBOX_BYTE(dword, 1) /// tmp.byte[2] := SBOX_BYTE(dword, 2) /// tmp.byte[3] := SBOX_BYTE(dword, 3) /// RETURN tmp /// } /// DEFINE L_RND(dword) { /// tmp := dword /// tmp := tmp ^ ROL32(dword, 2) /// tmp := tmp ^ ROL32(dword, 10) /// tmp := tmp ^ ROL32(dword, 18) /// tmp := tmp ^ ROL32(dword, 24) /// RETURN tmp /// } /// DEFINE T_RND(dword) { /// RETURN L_RND(lower_t(dword)) /// } /// DEFINE F_RND(X0, X1, X2, X3, round_key) { /// RETURN X0 ^ T_RND(X1 ^ X2 ^ X3 ^ round_key) /// } /// FOR i:= 0 to 0 /// P[0] := __B.xmm[i].dword[0] /// P[1] := __B.xmm[i].dword[1] /// P[2] := __B.xmm[i].dword[2] /// P[3] := __B.xmm[i].dword[3] /// C[0] := F_RND(P[0], P[1], P[2], P[3], __A.xmm[i].dword[0]) /// C[1] := F_RND(P[1], P[2], P[3], C[0], __A.xmm[i].dword[1]) /// C[2] := F_RND(P[2], P[3], C[0], C[1], __A.xmm[i].dword[2]) /// C[3] := F_RND(P[3], C[0], C[1], C[2], __A.xmm[i].dword[3]) /// DEST.xmm[i].dword[0] := C[0] /// DEST.xmm[i].dword[1] := C[1] /// DEST.xmm[i].dword[2] := C[2] /// DEST.xmm[i].dword[3] := C[3] /// ENDFOR /// DEST[MAX:128] := 0 /// \endcode #define _mm_sm4rnds4_epi32(A, B) \ (__m128i) __builtin_ia32_vsm4rnds4128((__v4su)A, (__v4su)B) /// This intrinisc performs four rounds of SM4 encryption. The intrinisc /// operates on independent 128-bit lanes. The calculated results are /// stored in \a dst. /// \headerfile /// /// \code /// __m256i _mm256_sm4rnds4_epi32(__m256i __A, __m256i __B) /// \endcode /// /// This intrinsic corresponds to the \c VSM4RNDS4 instruction. /// /// \param __A /// A 256-bit vector of [8 x int]. /// \param __B /// A 256-bit vector of [8 x int]. /// \returns /// A 256-bit vector of [8 x int]. /// /// \code{.operation} /// DEFINE ROL32(dword, n) { /// count := n % 32 /// dest := (dword << count) | (dword >> (32-count)) /// RETURN dest /// } /// DEFINE lower_t(dword) { /// tmp.byte[0] := SBOX_BYTE(dword, 0) /// tmp.byte[1] := SBOX_BYTE(dword, 1) /// tmp.byte[2] := SBOX_BYTE(dword, 2) /// tmp.byte[3] := SBOX_BYTE(dword, 3) /// RETURN tmp /// } /// DEFINE L_RND(dword) { /// tmp := dword /// tmp := tmp ^ ROL32(dword, 2) /// tmp := tmp ^ ROL32(dword, 10) /// tmp := tmp ^ ROL32(dword, 18) /// tmp := tmp ^ ROL32(dword, 24) /// RETURN tmp /// } /// DEFINE T_RND(dword) { /// RETURN L_RND(lower_t(dword)) /// } /// DEFINE F_RND(X0, X1, X2, X3, round_key) { /// RETURN X0 ^ T_RND(X1 ^ X2 ^ X3 ^ round_key) /// } /// FOR i:= 0 to 0 /// P[0] := __B.xmm[i].dword[0] /// P[1] := __B.xmm[i].dword[1] /// P[2] := __B.xmm[i].dword[2] /// P[3] := __B.xmm[i].dword[3] /// C[0] := F_RND(P[0], P[1], P[2], P[3], __A.xmm[i].dword[0]) /// C[1] := F_RND(P[1], P[2], P[3], C[0], __A.xmm[i].dword[1]) /// C[2] := F_RND(P[2], P[3], C[0], C[1], __A.xmm[i].dword[2]) /// C[3] := F_RND(P[3], C[0], C[1], C[2], __A.xmm[i].dword[3]) /// DEST.xmm[i].dword[0] := C[0] /// DEST.xmm[i].dword[1] := C[1] /// DEST.xmm[i].dword[2] := C[2] /// DEST.xmm[i].dword[3] := C[3] /// ENDFOR /// DEST[MAX:256] := 0 /// \endcode #define _mm256_sm4rnds4_epi32(A, B) \ (__m256i) __builtin_ia32_vsm4rnds4256((__v8su)A, (__v8su)B) #endif // __SM4INTRIN_H /*===---- smmintrin.h - SSE4 intrinsics ------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __SMMINTRIN_H #define __SMMINTRIN_H #if !defined(__i386__) && !defined(__x86_64__) #error "This header is only meant to be used on x86 and x64 architecture" #endif #include /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("sse4.1"), \ __min_vector_width__(128))) /* SSE4 Rounding macros. */ #define _MM_FROUND_TO_NEAREST_INT 0x00 #define _MM_FROUND_TO_NEG_INF 0x01 #define _MM_FROUND_TO_POS_INF 0x02 #define _MM_FROUND_TO_ZERO 0x03 #define _MM_FROUND_CUR_DIRECTION 0x04 #define _MM_FROUND_RAISE_EXC 0x00 #define _MM_FROUND_NO_EXC 0x08 #define _MM_FROUND_NINT (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEAREST_INT) #define _MM_FROUND_FLOOR (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEG_INF) #define _MM_FROUND_CEIL (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_POS_INF) #define _MM_FROUND_TRUNC (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_ZERO) #define _MM_FROUND_RINT (_MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION) #define _MM_FROUND_NEARBYINT (_MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION) /// Rounds up each element of the 128-bit vector of [4 x float] to an /// integer and returns the rounded values in a 128-bit vector of /// [4 x float]. /// /// \headerfile /// /// \code /// __m128 _mm_ceil_ps(__m128 X); /// \endcode /// /// This intrinsic corresponds to the VROUNDPS / ROUNDPS instruction. /// /// \param X /// A 128-bit vector of [4 x float] values to be rounded up. /// \returns A 128-bit vector of [4 x float] containing the rounded values. #define _mm_ceil_ps(X) _mm_round_ps((X), _MM_FROUND_CEIL) /// Rounds up each element of the 128-bit vector of [2 x double] to an /// integer and returns the rounded values in a 128-bit vector of /// [2 x double]. /// /// \headerfile /// /// \code /// __m128d _mm_ceil_pd(__m128d X); /// \endcode /// /// This intrinsic corresponds to the VROUNDPD / ROUNDPD instruction. /// /// \param X /// A 128-bit vector of [2 x double] values to be rounded up. /// \returns A 128-bit vector of [2 x double] containing the rounded values. #define _mm_ceil_pd(X) _mm_round_pd((X), _MM_FROUND_CEIL) /// Copies three upper elements of the first 128-bit vector operand to /// the corresponding three upper elements of the 128-bit result vector of /// [4 x float]. Rounds up the lowest element of the second 128-bit vector /// operand to an integer and copies it to the lowest element of the 128-bit /// result vector of [4 x float]. /// /// \headerfile /// /// \code /// __m128 _mm_ceil_ss(__m128 X, __m128 Y); /// \endcode /// /// This intrinsic corresponds to the VROUNDSS / ROUNDSS instruction. /// /// \param X /// A 128-bit vector of [4 x float]. The values stored in bits [127:32] are /// copied to the corresponding bits of the result. /// \param Y /// A 128-bit vector of [4 x float]. The value stored in bits [31:0] is /// rounded up to the nearest integer and copied to the corresponding bits /// of the result. /// \returns A 128-bit vector of [4 x float] containing the copied and rounded /// values. #define _mm_ceil_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_CEIL) /// Copies the upper element of the first 128-bit vector operand to the /// corresponding upper element of the 128-bit result vector of [2 x double]. /// Rounds up the lower element of the second 128-bit vector operand to an /// integer and copies it to the lower element of the 128-bit result vector /// of [2 x double]. /// /// \headerfile /// /// \code /// __m128d _mm_ceil_sd(__m128d X, __m128d Y); /// \endcode /// /// This intrinsic corresponds to the VROUNDSD / ROUNDSD instruction. /// /// \param X /// A 128-bit vector of [2 x double]. The value stored in bits [127:64] is /// copied to the corresponding bits of the result. /// \param Y /// A 128-bit vector of [2 x double]. The value stored in bits [63:0] is /// rounded up to the nearest integer and copied to the corresponding bits /// of the result. /// \returns A 128-bit vector of [2 x double] containing the copied and rounded /// values. #define _mm_ceil_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_CEIL) /// Rounds down each element of the 128-bit vector of [4 x float] to an /// an integer and returns the rounded values in a 128-bit vector of /// [4 x float]. /// /// \headerfile /// /// \code /// __m128 _mm_floor_ps(__m128 X); /// \endcode /// /// This intrinsic corresponds to the VROUNDPS / ROUNDPS instruction. /// /// \param X /// A 128-bit vector of [4 x float] values to be rounded down. /// \returns A 128-bit vector of [4 x float] containing the rounded values. #define _mm_floor_ps(X) _mm_round_ps((X), _MM_FROUND_FLOOR) /// Rounds down each element of the 128-bit vector of [2 x double] to an /// integer and returns the rounded values in a 128-bit vector of /// [2 x double]. /// /// \headerfile /// /// \code /// __m128d _mm_floor_pd(__m128d X); /// \endcode /// /// This intrinsic corresponds to the VROUNDPD / ROUNDPD instruction. /// /// \param X /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector of [2 x double] containing the rounded values. #define _mm_floor_pd(X) _mm_round_pd((X), _MM_FROUND_FLOOR) /// Copies three upper elements of the first 128-bit vector operand to /// the corresponding three upper elements of the 128-bit result vector of /// [4 x float]. Rounds down the lowest element of the second 128-bit vector /// operand to an integer and copies it to the lowest element of the 128-bit /// result vector of [4 x float]. /// /// \headerfile /// /// \code /// __m128 _mm_floor_ss(__m128 X, __m128 Y); /// \endcode /// /// This intrinsic corresponds to the VROUNDSS / ROUNDSS instruction. /// /// \param X /// A 128-bit vector of [4 x float]. The values stored in bits [127:32] are /// copied to the corresponding bits of the result. /// \param Y /// A 128-bit vector of [4 x float]. The value stored in bits [31:0] is /// rounded down to the nearest integer and copied to the corresponding bits /// of the result. /// \returns A 128-bit vector of [4 x float] containing the copied and rounded /// values. #define _mm_floor_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_FLOOR) /// Copies the upper element of the first 128-bit vector operand to the /// corresponding upper element of the 128-bit result vector of [2 x double]. /// Rounds down the lower element of the second 128-bit vector operand to an /// integer and copies it to the lower element of the 128-bit result vector /// of [2 x double]. /// /// \headerfile /// /// \code /// __m128d _mm_floor_sd(__m128d X, __m128d Y); /// \endcode /// /// This intrinsic corresponds to the VROUNDSD / ROUNDSD instruction. /// /// \param X /// A 128-bit vector of [2 x double]. The value stored in bits [127:64] is /// copied to the corresponding bits of the result. /// \param Y /// A 128-bit vector of [2 x double]. The value stored in bits [63:0] is /// rounded down to the nearest integer and copied to the corresponding bits /// of the result. /// \returns A 128-bit vector of [2 x double] containing the copied and rounded /// values. #define _mm_floor_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_FLOOR) /// Rounds each element of the 128-bit vector of [4 x float] to an /// integer value according to the rounding control specified by the second /// argument and returns the rounded values in a 128-bit vector of /// [4 x float]. /// /// \headerfile /// /// \code /// __m128 _mm_round_ps(__m128 X, const int M); /// \endcode /// /// This intrinsic corresponds to the VROUNDPS / ROUNDPS instruction. /// /// \param X /// A 128-bit vector of [4 x float]. /// \param M /// An integer value that specifies the rounding operation. \n /// Bits [7:4] are reserved. \n /// Bit [3] is a precision exception value: \n /// 0: A normal PE exception is used \n /// 1: The PE field is not updated \n /// Bit [2] is the rounding control source: \n /// 0: Use bits [1:0] of \a M \n /// 1: Use the current MXCSR setting \n /// Bits [1:0] contain the rounding control definition: \n /// 00: Nearest \n /// 01: Downward (toward negative infinity) \n /// 10: Upward (toward positive infinity) \n /// 11: Truncated /// \returns A 128-bit vector of [4 x float] containing the rounded values. #define _mm_round_ps(X, M) \ ((__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M))) /// Copies three upper elements of the first 128-bit vector operand to /// the corresponding three upper elements of the 128-bit result vector of /// [4 x float]. Rounds the lowest element of the second 128-bit vector /// operand to an integer value according to the rounding control specified /// by the third argument and copies it to the lowest element of the 128-bit /// result vector of [4 x float]. /// /// \headerfile /// /// \code /// __m128 _mm_round_ss(__m128 X, __m128 Y, const int M); /// \endcode /// /// This intrinsic corresponds to the VROUNDSS / ROUNDSS instruction. /// /// \param X /// A 128-bit vector of [4 x float]. The values stored in bits [127:32] are /// copied to the corresponding bits of the result. /// \param Y /// A 128-bit vector of [4 x float]. The value stored in bits [31:0] is /// rounded to the nearest integer using the specified rounding control and /// copied to the corresponding bits of the result. /// \param M /// An integer value that specifies the rounding operation. \n /// Bits [7:4] are reserved. \n /// Bit [3] is a precision exception value: \n /// 0: A normal PE exception is used \n /// 1: The PE field is not updated \n /// Bit [2] is the rounding control source: \n /// 0: Use bits [1:0] of \a M \n /// 1: Use the current MXCSR setting \n /// Bits [1:0] contain the rounding control definition: \n /// 00: Nearest \n /// 01: Downward (toward negative infinity) \n /// 10: Upward (toward positive infinity) \n /// 11: Truncated /// \returns A 128-bit vector of [4 x float] containing the copied and rounded /// values. #define _mm_round_ss(X, Y, M) \ ((__m128)__builtin_ia32_roundss((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), \ (M))) /// Rounds each element of the 128-bit vector of [2 x double] to an /// integer value according to the rounding control specified by the second /// argument and returns the rounded values in a 128-bit vector of /// [2 x double]. /// /// \headerfile /// /// \code /// __m128d _mm_round_pd(__m128d X, const int M); /// \endcode /// /// This intrinsic corresponds to the VROUNDPD / ROUNDPD instruction. /// /// \param X /// A 128-bit vector of [2 x double]. /// \param M /// An integer value that specifies the rounding operation. \n /// Bits [7:4] are reserved. \n /// Bit [3] is a precision exception value: \n /// 0: A normal PE exception is used \n /// 1: The PE field is not updated \n /// Bit [2] is the rounding control source: \n /// 0: Use bits [1:0] of \a M \n /// 1: Use the current MXCSR setting \n /// Bits [1:0] contain the rounding control definition: \n /// 00: Nearest \n /// 01: Downward (toward negative infinity) \n /// 10: Upward (toward positive infinity) \n /// 11: Truncated /// \returns A 128-bit vector of [2 x double] containing the rounded values. #define _mm_round_pd(X, M) \ ((__m128d)__builtin_ia32_roundpd((__v2df)(__m128d)(X), (M))) /// Copies the upper element of the first 128-bit vector operand to the /// corresponding upper element of the 128-bit result vector of [2 x double]. /// Rounds the lower element of the second 128-bit vector operand to an /// integer value according to the rounding control specified by the third /// argument and copies it to the lower element of the 128-bit result vector /// of [2 x double]. /// /// \headerfile /// /// \code /// __m128d _mm_round_sd(__m128d X, __m128d Y, const int M); /// \endcode /// /// This intrinsic corresponds to the VROUNDSD / ROUNDSD instruction. /// /// \param X /// A 128-bit vector of [2 x double]. The value stored in bits [127:64] is /// copied to the corresponding bits of the result. /// \param Y /// A 128-bit vector of [2 x double]. The value stored in bits [63:0] is /// rounded to the nearest integer using the specified rounding control and /// copied to the corresponding bits of the result. /// \param M /// An integer value that specifies the rounding operation. \n /// Bits [7:4] are reserved. \n /// Bit [3] is a precision exception value: \n /// 0: A normal PE exception is used \n /// 1: The PE field is not updated \n /// Bit [2] is the rounding control source: \n /// 0: Use bits [1:0] of \a M \n /// 1: Use the current MXCSR setting \n /// Bits [1:0] contain the rounding control definition: \n /// 00: Nearest \n /// 01: Downward (toward negative infinity) \n /// 10: Upward (toward positive infinity) \n /// 11: Truncated /// \returns A 128-bit vector of [2 x double] containing the copied and rounded /// values. #define _mm_round_sd(X, Y, M) \ ((__m128d)__builtin_ia32_roundsd((__v2df)(__m128d)(X), (__v2df)(__m128d)(Y), \ (M))) /* SSE4 Packed Blending Intrinsics. */ /// Returns a 128-bit vector of [2 x double] where the values are /// selected from either the first or second operand as specified by the /// third operand, the control mask. /// /// \headerfile /// /// \code /// __m128d _mm_blend_pd(__m128d V1, __m128d V2, const int M); /// \endcode /// /// This intrinsic corresponds to the VBLENDPD / BLENDPD instruction. /// /// \param V1 /// A 128-bit vector of [2 x double]. /// \param V2 /// A 128-bit vector of [2 x double]. /// \param M /// An immediate integer operand, with mask bits [1:0] specifying how the /// values are to be copied. The position of the mask bit corresponds to the /// index of a copied value. When a mask bit is 0, the corresponding 64-bit /// element in operand \a V1 is copied to the same position in the result. /// When a mask bit is 1, the corresponding 64-bit element in operand \a V2 /// is copied to the same position in the result. /// \returns A 128-bit vector of [2 x double] containing the copied values. #define _mm_blend_pd(V1, V2, M) \ ((__m128d)__builtin_ia32_blendpd((__v2df)(__m128d)(V1), \ (__v2df)(__m128d)(V2), (int)(M))) /// Returns a 128-bit vector of [4 x float] where the values are selected /// from either the first or second operand as specified by the third /// operand, the control mask. /// /// \headerfile /// /// \code /// __m128 _mm_blend_ps(__m128 V1, __m128 V2, const int M); /// \endcode /// /// This intrinsic corresponds to the VBLENDPS / BLENDPS instruction. /// /// \param V1 /// A 128-bit vector of [4 x float]. /// \param V2 /// A 128-bit vector of [4 x float]. /// \param M /// An immediate integer operand, with mask bits [3:0] specifying how the /// values are to be copied. The position of the mask bit corresponds to the /// index of a copied value. When a mask bit is 0, the corresponding 32-bit /// element in operand \a V1 is copied to the same position in the result. /// When a mask bit is 1, the corresponding 32-bit element in operand \a V2 /// is copied to the same position in the result. /// \returns A 128-bit vector of [4 x float] containing the copied values. #define _mm_blend_ps(V1, V2, M) \ ((__m128)__builtin_ia32_blendps((__v4sf)(__m128)(V1), (__v4sf)(__m128)(V2), \ (int)(M))) /// Returns a 128-bit vector of [2 x double] where the values are /// selected from either the first or second operand as specified by the /// third operand, the control mask. /// /// \headerfile /// /// This intrinsic corresponds to the VBLENDVPD / BLENDVPD instruction. /// /// \param __V1 /// A 128-bit vector of [2 x double]. /// \param __V2 /// A 128-bit vector of [2 x double]. /// \param __M /// A 128-bit vector operand, with mask bits 127 and 63 specifying how the /// values are to be copied. The position of the mask bit corresponds to the /// most significant bit of a copied value. When a mask bit is 0, the /// corresponding 64-bit element in operand \a __V1 is copied to the same /// position in the result. When a mask bit is 1, the corresponding 64-bit /// element in operand \a __V2 is copied to the same position in the result. /// \returns A 128-bit vector of [2 x double] containing the copied values. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_blendv_pd(__m128d __V1, __m128d __V2, __m128d __M) { return (__m128d)__builtin_ia32_blendvpd((__v2df)__V1, (__v2df)__V2, (__v2df)__M); } /// Returns a 128-bit vector of [4 x float] where the values are /// selected from either the first or second operand as specified by the /// third operand, the control mask. /// /// \headerfile /// /// This intrinsic corresponds to the VBLENDVPS / BLENDVPS instruction. /// /// \param __V1 /// A 128-bit vector of [4 x float]. /// \param __V2 /// A 128-bit vector of [4 x float]. /// \param __M /// A 128-bit vector operand, with mask bits 127, 95, 63, and 31 specifying /// how the values are to be copied. The position of the mask bit corresponds /// to the most significant bit of a copied value. When a mask bit is 0, the /// corresponding 32-bit element in operand \a __V1 is copied to the same /// position in the result. When a mask bit is 1, the corresponding 32-bit /// element in operand \a __V2 is copied to the same position in the result. /// \returns A 128-bit vector of [4 x float] containing the copied values. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_blendv_ps(__m128 __V1, __m128 __V2, __m128 __M) { return (__m128)__builtin_ia32_blendvps((__v4sf)__V1, (__v4sf)__V2, (__v4sf)__M); } /// Returns a 128-bit vector of [16 x i8] where the values are selected /// from either of the first or second operand as specified by the third /// operand, the control mask. /// /// \headerfile /// /// This intrinsic corresponds to the VPBLENDVB / PBLENDVB instruction. /// /// \param __V1 /// A 128-bit vector of [16 x i8]. /// \param __V2 /// A 128-bit vector of [16 x i8]. /// \param __M /// A 128-bit vector operand, with mask bits 127, 119, 111...7 specifying /// how the values are to be copied. The position of the mask bit corresponds /// to the most significant bit of a copied value. When a mask bit is 0, the /// corresponding 8-bit element in operand \a __V1 is copied to the same /// position in the result. When a mask bit is 1, the corresponding 8-bit /// element in operand \a __V2 is copied to the same position in the result. /// \returns A 128-bit vector of [16 x i8] containing the copied values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_blendv_epi8(__m128i __V1, __m128i __V2, __m128i __M) { return (__m128i)__builtin_ia32_pblendvb128((__v16qi)__V1, (__v16qi)__V2, (__v16qi)__M); } /// Returns a 128-bit vector of [8 x i16] where the values are selected /// from either of the first or second operand as specified by the third /// operand, the control mask. /// /// \headerfile /// /// \code /// __m128i _mm_blend_epi16(__m128i V1, __m128i V2, const int M); /// \endcode /// /// This intrinsic corresponds to the VPBLENDW / PBLENDW instruction. /// /// \param V1 /// A 128-bit vector of [8 x i16]. /// \param V2 /// A 128-bit vector of [8 x i16]. /// \param M /// An immediate integer operand, with mask bits [7:0] specifying how the /// values are to be copied. The position of the mask bit corresponds to the /// index of a copied value. When a mask bit is 0, the corresponding 16-bit /// element in operand \a V1 is copied to the same position in the result. /// When a mask bit is 1, the corresponding 16-bit element in operand \a V2 /// is copied to the same position in the result. /// \returns A 128-bit vector of [8 x i16] containing the copied values. #define _mm_blend_epi16(V1, V2, M) \ ((__m128i)__builtin_ia32_pblendw128((__v8hi)(__m128i)(V1), \ (__v8hi)(__m128i)(V2), (int)(M))) /* SSE4 Dword Multiply Instructions. */ /// Multiples corresponding elements of two 128-bit vectors of [4 x i32] /// and returns the lower 32 bits of the each product in a 128-bit vector of /// [4 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the VPMULLD / PMULLD instruction. /// /// \param __V1 /// A 128-bit integer vector. /// \param __V2 /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the products of both operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi32(__m128i __V1, __m128i __V2) { return (__m128i)((__v4su)__V1 * (__v4su)__V2); } /// Multiplies corresponding even-indexed elements of two 128-bit /// vectors of [4 x i32] and returns a 128-bit vector of [2 x i64] /// containing the products. /// /// \headerfile /// /// This intrinsic corresponds to the VPMULDQ / PMULDQ instruction. /// /// \param __V1 /// A 128-bit vector of [4 x i32]. /// \param __V2 /// A 128-bit vector of [4 x i32]. /// \returns A 128-bit vector of [2 x i64] containing the products of both /// operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epi32(__m128i __V1, __m128i __V2) { return (__m128i)__builtin_ia32_pmuldq128((__v4si)__V1, (__v4si)__V2); } /* SSE4 Floating Point Dot Product Instructions. */ /// Computes the dot product of the two 128-bit vectors of [4 x float] /// and returns it in the elements of the 128-bit result vector of /// [4 x float]. /// /// The immediate integer operand controls which input elements /// will contribute to the dot product, and where the final results are /// returned. /// /// \headerfile /// /// \code /// __m128 _mm_dp_ps(__m128 X, __m128 Y, const int M); /// \endcode /// /// This intrinsic corresponds to the VDPPS / DPPS instruction. /// /// \param X /// A 128-bit vector of [4 x float]. /// \param Y /// A 128-bit vector of [4 x float]. /// \param M /// An immediate integer operand. Mask bits [7:4] determine which elements /// of the input vectors are used, with bit [4] corresponding to the lowest /// element and bit [7] corresponding to the highest element of each [4 x /// float] vector. If a bit is set, the corresponding elements from the two /// input vectors are used as an input for dot product; otherwise that input /// is treated as zero. Bits [3:0] determine which elements of the result /// will receive a copy of the final dot product, with bit [0] corresponding /// to the lowest element and bit [3] corresponding to the highest element of /// each [4 x float] subvector. If a bit is set, the dot product is returned /// in the corresponding element; otherwise that element is set to zero. /// \returns A 128-bit vector of [4 x float] containing the dot product. #define _mm_dp_ps(X, Y, M) \ ((__m128)__builtin_ia32_dpps((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), (M))) /// Computes the dot product of the two 128-bit vectors of [2 x double] /// and returns it in the elements of the 128-bit result vector of /// [2 x double]. /// /// The immediate integer operand controls which input /// elements will contribute to the dot product, and where the final results /// are returned. /// /// \headerfile /// /// \code /// __m128d _mm_dp_pd(__m128d X, __m128d Y, const int M); /// \endcode /// /// This intrinsic corresponds to the VDPPD / DPPD instruction. /// /// \param X /// A 128-bit vector of [2 x double]. /// \param Y /// A 128-bit vector of [2 x double]. /// \param M /// An immediate integer operand. Mask bits [5:4] determine which elements /// of the input vectors are used, with bit [4] corresponding to the lowest /// element and bit [5] corresponding to the highest element of each of [2 x /// double] vector. If a bit is set, the corresponding elements from the two /// input vectors are used as an input for dot product; otherwise that input /// is treated as zero. Bits [1:0] determine which elements of the result /// will receive a copy of the final dot product, with bit [0] corresponding /// to the lowest element and bit [1] corresponding to the highest element of /// each [2 x double] vector. If a bit is set, the dot product is returned in /// the corresponding element; otherwise that element is set to zero. #define _mm_dp_pd(X, Y, M) \ ((__m128d)__builtin_ia32_dppd((__v2df)(__m128d)(X), (__v2df)(__m128d)(Y), \ (M))) /* SSE4 Streaming Load Hint Instruction. */ /// Loads integer values from a 128-bit aligned memory location to a /// 128-bit integer vector. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVNTDQA / MOVNTDQA instruction. /// /// \param __V /// A pointer to a 128-bit aligned memory location that contains the integer /// values. /// \returns A 128-bit integer vector containing the data stored at the /// specified memory location. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_stream_load_si128(__m128i const *__V) { return (__m128i)__builtin_nontemporal_load((const __v2di *)__V); } /* SSE4 Packed Integer Min/Max Instructions. */ /// Compares the corresponding elements of two 128-bit vectors of /// [16 x i8] and returns a 128-bit vector of [16 x i8] containing the lesser /// of the two values. /// /// \headerfile /// /// This intrinsic corresponds to the VPMINSB / PMINSB instruction. /// /// \param __V1 /// A 128-bit vector of [16 x i8]. /// \param __V2 /// A 128-bit vector of [16 x i8] /// \returns A 128-bit vector of [16 x i8] containing the lesser values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi8(__m128i __V1, __m128i __V2) { return (__m128i)__builtin_elementwise_min((__v16qs)__V1, (__v16qs)__V2); } /// Compares the corresponding elements of two 128-bit vectors of /// [16 x i8] and returns a 128-bit vector of [16 x i8] containing the /// greater value of the two. /// /// \headerfile /// /// This intrinsic corresponds to the VPMAXSB / PMAXSB instruction. /// /// \param __V1 /// A 128-bit vector of [16 x i8]. /// \param __V2 /// A 128-bit vector of [16 x i8]. /// \returns A 128-bit vector of [16 x i8] containing the greater values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi8(__m128i __V1, __m128i __V2) { return (__m128i)__builtin_elementwise_max((__v16qs)__V1, (__v16qs)__V2); } /// Compares the corresponding elements of two 128-bit vectors of /// [8 x u16] and returns a 128-bit vector of [8 x u16] containing the lesser /// value of the two. /// /// \headerfile /// /// This intrinsic corresponds to the VPMINUW / PMINUW instruction. /// /// \param __V1 /// A 128-bit vector of [8 x u16]. /// \param __V2 /// A 128-bit vector of [8 x u16]. /// \returns A 128-bit vector of [8 x u16] containing the lesser values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu16(__m128i __V1, __m128i __V2) { return (__m128i)__builtin_elementwise_min((__v8hu)__V1, (__v8hu)__V2); } /// Compares the corresponding elements of two 128-bit vectors of /// [8 x u16] and returns a 128-bit vector of [8 x u16] containing the /// greater value of the two. /// /// \headerfile /// /// This intrinsic corresponds to the VPMAXUW / PMAXUW instruction. /// /// \param __V1 /// A 128-bit vector of [8 x u16]. /// \param __V2 /// A 128-bit vector of [8 x u16]. /// \returns A 128-bit vector of [8 x u16] containing the greater values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu16(__m128i __V1, __m128i __V2) { return (__m128i)__builtin_elementwise_max((__v8hu)__V1, (__v8hu)__V2); } /// Compares the corresponding elements of two 128-bit vectors of /// [4 x i32] and returns a 128-bit vector of [4 x i32] containing the lesser /// value of the two. /// /// \headerfile /// /// This intrinsic corresponds to the VPMINSD / PMINSD instruction. /// /// \param __V1 /// A 128-bit vector of [4 x i32]. /// \param __V2 /// A 128-bit vector of [4 x i32]. /// \returns A 128-bit vector of [4 x i32] containing the lesser values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi32(__m128i __V1, __m128i __V2) { return (__m128i)__builtin_elementwise_min((__v4si)__V1, (__v4si)__V2); } /// Compares the corresponding elements of two 128-bit vectors of /// [4 x i32] and returns a 128-bit vector of [4 x i32] containing the /// greater value of the two. /// /// \headerfile /// /// This intrinsic corresponds to the VPMAXSD / PMAXSD instruction. /// /// \param __V1 /// A 128-bit vector of [4 x i32]. /// \param __V2 /// A 128-bit vector of [4 x i32]. /// \returns A 128-bit vector of [4 x i32] containing the greater values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi32(__m128i __V1, __m128i __V2) { return (__m128i)__builtin_elementwise_max((__v4si)__V1, (__v4si)__V2); } /// Compares the corresponding elements of two 128-bit vectors of /// [4 x u32] and returns a 128-bit vector of [4 x u32] containing the lesser /// value of the two. /// /// \headerfile /// /// This intrinsic corresponds to the VPMINUD / PMINUD instruction. /// /// \param __V1 /// A 128-bit vector of [4 x u32]. /// \param __V2 /// A 128-bit vector of [4 x u32]. /// \returns A 128-bit vector of [4 x u32] containing the lesser values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu32(__m128i __V1, __m128i __V2) { return (__m128i)__builtin_elementwise_min((__v4su)__V1, (__v4su)__V2); } /// Compares the corresponding elements of two 128-bit vectors of /// [4 x u32] and returns a 128-bit vector of [4 x u32] containing the /// greater value of the two. /// /// \headerfile /// /// This intrinsic corresponds to the VPMAXUD / PMAXUD instruction. /// /// \param __V1 /// A 128-bit vector of [4 x u32]. /// \param __V2 /// A 128-bit vector of [4 x u32]. /// \returns A 128-bit vector of [4 x u32] containing the greater values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu32(__m128i __V1, __m128i __V2) { return (__m128i)__builtin_elementwise_max((__v4su)__V1, (__v4su)__V2); } /* SSE4 Insertion and Extraction from XMM Register Instructions. */ /// Takes the first argument \a X and inserts an element from the second /// argument \a Y as selected by the third argument \a N. That result then /// has elements zeroed out also as selected by the third argument \a N. The /// resulting 128-bit vector of [4 x float] is then returned. /// /// \headerfile /// /// \code /// __m128 _mm_insert_ps(__m128 X, __m128 Y, const int N); /// \endcode /// /// This intrinsic corresponds to the VINSERTPS instruction. /// /// \param X /// A 128-bit vector source operand of [4 x float]. With the exception of /// those bits in the result copied from parameter \a Y and zeroed by bits /// [3:0] of \a N, all bits from this parameter are copied to the result. /// \param Y /// A 128-bit vector source operand of [4 x float]. One single-precision /// floating-point element from this source, as determined by the immediate /// parameter, is copied to the result. /// \param N /// Specifies which bits from operand \a Y will be copied, which bits in the /// result they will be copied to, and which bits in the result will be /// cleared. The following assignments are made: \n /// Bits [7:6] specify the bits to copy from operand \a Y: \n /// 00: Selects bits [31:0] from operand \a Y. \n /// 01: Selects bits [63:32] from operand \a Y. \n /// 10: Selects bits [95:64] from operand \a Y. \n /// 11: Selects bits [127:96] from operand \a Y. \n /// Bits [5:4] specify the bits in the result to which the selected bits /// from operand \a Y are copied: \n /// 00: Copies the selected bits from \a Y to result bits [31:0]. \n /// 01: Copies the selected bits from \a Y to result bits [63:32]. \n /// 10: Copies the selected bits from \a Y to result bits [95:64]. \n /// 11: Copies the selected bits from \a Y to result bits [127:96]. \n /// Bits[3:0]: If any of these bits are set, the corresponding result /// element is cleared. /// \returns A 128-bit vector of [4 x float] containing the copied /// single-precision floating point elements from the operands. #define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N)) /// Extracts a 32-bit integer from a 128-bit vector of [4 x float] and /// returns it, using the immediate value parameter \a N as a selector. /// /// \headerfile /// /// \code /// int _mm_extract_ps(__m128 X, const int N); /// \endcode /// /// This intrinsic corresponds to the VEXTRACTPS / EXTRACTPS /// instruction. /// /// \param X /// A 128-bit vector of [4 x float]. /// \param N /// An immediate value. Bits [1:0] determines which bits from the argument /// \a X are extracted and returned: \n /// 00: Bits [31:0] of parameter \a X are returned. \n /// 01: Bits [63:32] of parameter \a X are returned. \n /// 10: Bits [95:64] of parameter \a X are returned. \n /// 11: Bits [127:96] of parameter \a X are returned. /// \returns A 32-bit integer containing the extracted 32 bits of float data. #define _mm_extract_ps(X, N) \ __builtin_bit_cast( \ int, __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N))) /* Miscellaneous insert and extract macros. */ /* Extract a single-precision float from X at index N into D. */ #define _MM_EXTRACT_FLOAT(D, X, N) \ do { \ (D) = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); \ } while (0) /* Or together 2 sets of indexes (X and Y) with the zeroing bits (Z) to create an index suitable for _mm_insert_ps. */ #define _MM_MK_INSERTPS_NDX(X, Y, Z) (((X) << 6) | ((Y) << 4) | (Z)) /* Extract a float from X at index N into the first index of the return. */ #define _MM_PICK_OUT_PS(X, N) \ _mm_insert_ps(_mm_setzero_ps(), (X), _MM_MK_INSERTPS_NDX((N), 0, 0x0e)) /* Insert int into packed integer array at index. */ /// Constructs a 128-bit vector of [16 x i8] by first making a copy of /// the 128-bit integer vector parameter, and then inserting the lower 8 bits /// of an integer parameter \a I into an offset specified by the immediate /// value parameter \a N. /// /// \headerfile /// /// \code /// __m128i _mm_insert_epi8(__m128i X, int I, const int N); /// \endcode /// /// This intrinsic corresponds to the VPINSRB / PINSRB instruction. /// /// \param X /// A 128-bit integer vector of [16 x i8]. This vector is copied to the /// result and then one of the sixteen elements in the result vector is /// replaced by the lower 8 bits of \a I. /// \param I /// An integer. The lower 8 bits of this operand are written to the result /// beginning at the offset specified by \a N. /// \param N /// An immediate value. Bits [3:0] specify the bit offset in the result at /// which the lower 8 bits of \a I are written. \n /// 0000: Bits [7:0] of the result are used for insertion. \n /// 0001: Bits [15:8] of the result are used for insertion. \n /// 0010: Bits [23:16] of the result are used for insertion. \n /// 0011: Bits [31:24] of the result are used for insertion. \n /// 0100: Bits [39:32] of the result are used for insertion. \n /// 0101: Bits [47:40] of the result are used for insertion. \n /// 0110: Bits [55:48] of the result are used for insertion. \n /// 0111: Bits [63:56] of the result are used for insertion. \n /// 1000: Bits [71:64] of the result are used for insertion. \n /// 1001: Bits [79:72] of the result are used for insertion. \n /// 1010: Bits [87:80] of the result are used for insertion. \n /// 1011: Bits [95:88] of the result are used for insertion. \n /// 1100: Bits [103:96] of the result are used for insertion. \n /// 1101: Bits [111:104] of the result are used for insertion. \n /// 1110: Bits [119:112] of the result are used for insertion. \n /// 1111: Bits [127:120] of the result are used for insertion. /// \returns A 128-bit integer vector containing the constructed values. #define _mm_insert_epi8(X, I, N) \ ((__m128i)__builtin_ia32_vec_set_v16qi((__v16qi)(__m128i)(X), (int)(I), \ (int)(N))) /// Constructs a 128-bit vector of [4 x i32] by first making a copy of /// the 128-bit integer vector parameter, and then inserting the 32-bit /// integer parameter \a I at the offset specified by the immediate value /// parameter \a N. /// /// \headerfile /// /// \code /// __m128i _mm_insert_epi32(__m128i X, int I, const int N); /// \endcode /// /// This intrinsic corresponds to the VPINSRD / PINSRD instruction. /// /// \param X /// A 128-bit integer vector of [4 x i32]. This vector is copied to the /// result and then one of the four elements in the result vector is /// replaced by \a I. /// \param I /// A 32-bit integer that is written to the result beginning at the offset /// specified by \a N. /// \param N /// An immediate value. Bits [1:0] specify the bit offset in the result at /// which the integer \a I is written. \n /// 00: Bits [31:0] of the result are used for insertion. \n /// 01: Bits [63:32] of the result are used for insertion. \n /// 10: Bits [95:64] of the result are used for insertion. \n /// 11: Bits [127:96] of the result are used for insertion. /// \returns A 128-bit integer vector containing the constructed values. #define _mm_insert_epi32(X, I, N) \ ((__m128i)__builtin_ia32_vec_set_v4si((__v4si)(__m128i)(X), (int)(I), \ (int)(N))) #ifdef __x86_64__ /// Constructs a 128-bit vector of [2 x i64] by first making a copy of /// the 128-bit integer vector parameter, and then inserting the 64-bit /// integer parameter \a I, using the immediate value parameter \a N as an /// insertion location selector. /// /// \headerfile /// /// \code /// __m128i _mm_insert_epi64(__m128i X, long long I, const int N); /// \endcode /// /// This intrinsic corresponds to the VPINSRQ / PINSRQ instruction. /// /// \param X /// A 128-bit integer vector of [2 x i64]. This vector is copied to the /// result and then one of the two elements in the result vector is replaced /// by \a I. /// \param I /// A 64-bit integer that is written to the result beginning at the offset /// specified by \a N. /// \param N /// An immediate value. Bit [0] specifies the bit offset in the result at /// which the integer \a I is written. \n /// 0: Bits [63:0] of the result are used for insertion. \n /// 1: Bits [127:64] of the result are used for insertion. \n /// \returns A 128-bit integer vector containing the constructed values. #define _mm_insert_epi64(X, I, N) \ ((__m128i)__builtin_ia32_vec_set_v2di((__v2di)(__m128i)(X), (long long)(I), \ (int)(N))) #endif /* __x86_64__ */ /* Extract int from packed integer array at index. This returns the element * as a zero extended value, so it is unsigned. */ /// Extracts an 8-bit element from the 128-bit integer vector of /// [16 x i8], using the immediate value parameter \a N as a selector. /// /// \headerfile /// /// \code /// int _mm_extract_epi8(__m128i X, const int N); /// \endcode /// /// This intrinsic corresponds to the VPEXTRB / PEXTRB instruction. /// /// \param X /// A 128-bit integer vector. /// \param N /// An immediate value. Bits [3:0] specify which 8-bit vector element from /// the argument \a X to extract and copy to the result. \n /// 0000: Bits [7:0] of parameter \a X are extracted. \n /// 0001: Bits [15:8] of the parameter \a X are extracted. \n /// 0010: Bits [23:16] of the parameter \a X are extracted. \n /// 0011: Bits [31:24] of the parameter \a X are extracted. \n /// 0100: Bits [39:32] of the parameter \a X are extracted. \n /// 0101: Bits [47:40] of the parameter \a X are extracted. \n /// 0110: Bits [55:48] of the parameter \a X are extracted. \n /// 0111: Bits [63:56] of the parameter \a X are extracted. \n /// 1000: Bits [71:64] of the parameter \a X are extracted. \n /// 1001: Bits [79:72] of the parameter \a X are extracted. \n /// 1010: Bits [87:80] of the parameter \a X are extracted. \n /// 1011: Bits [95:88] of the parameter \a X are extracted. \n /// 1100: Bits [103:96] of the parameter \a X are extracted. \n /// 1101: Bits [111:104] of the parameter \a X are extracted. \n /// 1110: Bits [119:112] of the parameter \a X are extracted. \n /// 1111: Bits [127:120] of the parameter \a X are extracted. /// \returns An unsigned integer, whose lower 8 bits are selected from the /// 128-bit integer vector parameter and the remaining bits are assigned /// zeros. #define _mm_extract_epi8(X, N) \ ((int)(unsigned char)__builtin_ia32_vec_ext_v16qi((__v16qi)(__m128i)(X), \ (int)(N))) /// Extracts a 32-bit element from the 128-bit integer vector of /// [4 x i32], using the immediate value parameter \a N as a selector. /// /// \headerfile /// /// \code /// int _mm_extract_epi32(__m128i X, const int N); /// \endcode /// /// This intrinsic corresponds to the VPEXTRD / PEXTRD instruction. /// /// \param X /// A 128-bit integer vector. /// \param N /// An immediate value. Bits [1:0] specify which 32-bit vector element from /// the argument \a X to extract and copy to the result. \n /// 00: Bits [31:0] of the parameter \a X are extracted. \n /// 01: Bits [63:32] of the parameter \a X are extracted. \n /// 10: Bits [95:64] of the parameter \a X are extracted. \n /// 11: Bits [127:96] of the parameter \a X are exracted. /// \returns An integer, whose lower 32 bits are selected from the 128-bit /// integer vector parameter and the remaining bits are assigned zeros. #define _mm_extract_epi32(X, N) \ ((int)__builtin_ia32_vec_ext_v4si((__v4si)(__m128i)(X), (int)(N))) /// Extracts a 64-bit element from the 128-bit integer vector of /// [2 x i64], using the immediate value parameter \a N as a selector. /// /// \headerfile /// /// \code /// long long _mm_extract_epi64(__m128i X, const int N); /// \endcode /// /// This intrinsic corresponds to the VPEXTRQ / PEXTRQ instruction /// in 64-bit mode. /// /// \param X /// A 128-bit integer vector. /// \param N /// An immediate value. Bit [0] specifies which 64-bit vector element from /// the argument \a X to return. \n /// 0: Bits [63:0] are returned. \n /// 1: Bits [127:64] are returned. \n /// \returns A 64-bit integer. #define _mm_extract_epi64(X, N) \ ((long long)__builtin_ia32_vec_ext_v2di((__v2di)(__m128i)(X), (int)(N))) /* SSE4 128-bit Packed Integer Comparisons. */ /// Tests whether the specified bits in a 128-bit integer vector are all /// zeros. /// /// \headerfile /// /// This intrinsic corresponds to the VPTEST / PTEST instruction. /// /// \param __M /// A 128-bit integer vector containing the bits to be tested. /// \param __V /// A 128-bit integer vector selecting which bits to test in operand \a __M. /// \returns TRUE if the specified bits are all zeros; FALSE otherwise. static __inline__ int __DEFAULT_FN_ATTRS _mm_testz_si128(__m128i __M, __m128i __V) { return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V); } /// Tests whether the specified bits in a 128-bit integer vector are all /// ones. /// /// \headerfile /// /// This intrinsic corresponds to the VPTEST / PTEST instruction. /// /// \param __M /// A 128-bit integer vector containing the bits to be tested. /// \param __V /// A 128-bit integer vector selecting which bits to test in operand \a __M. /// \returns TRUE if the specified bits are all ones; FALSE otherwise. static __inline__ int __DEFAULT_FN_ATTRS _mm_testc_si128(__m128i __M, __m128i __V) { return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V); } /// Tests whether the specified bits in a 128-bit integer vector are /// neither all zeros nor all ones. /// /// \headerfile /// /// This intrinsic corresponds to the VPTEST / PTEST instruction. /// /// \param __M /// A 128-bit integer vector containing the bits to be tested. /// \param __V /// A 128-bit integer vector selecting which bits to test in operand \a __M. /// \returns TRUE if the specified bits are neither all zeros nor all ones; /// FALSE otherwise. static __inline__ int __DEFAULT_FN_ATTRS _mm_testnzc_si128(__m128i __M, __m128i __V) { return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V); } /// Tests whether the specified bits in a 128-bit integer vector are all /// ones. /// /// \headerfile /// /// \code /// int _mm_test_all_ones(__m128i V); /// \endcode /// /// This intrinsic corresponds to the VPTEST / PTEST instruction. /// /// \param V /// A 128-bit integer vector containing the bits to be tested. /// \returns TRUE if the bits specified in the operand are all set to 1; FALSE /// otherwise. #define _mm_test_all_ones(V) _mm_testc_si128((V), _mm_set1_epi32(-1)) /// Tests whether the specified bits in a 128-bit integer vector are /// neither all zeros nor all ones. /// /// \headerfile /// /// \code /// int _mm_test_mix_ones_zeros(__m128i M, __m128i V); /// \endcode /// /// This intrinsic corresponds to the VPTEST / PTEST instruction. /// /// \param M /// A 128-bit integer vector containing the bits to be tested. /// \param V /// A 128-bit integer vector selecting which bits to test in operand \a M. /// \returns TRUE if the specified bits are neither all zeros nor all ones; /// FALSE otherwise. #define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V)) /// Tests whether the specified bits in a 128-bit integer vector are all /// zeros. /// /// \headerfile /// /// \code /// int _mm_test_all_zeros(__m128i M, __m128i V); /// \endcode /// /// This intrinsic corresponds to the VPTEST / PTEST instruction. /// /// \param M /// A 128-bit integer vector containing the bits to be tested. /// \param V /// A 128-bit integer vector selecting which bits to test in operand \a M. /// \returns TRUE if the specified bits are all zeros; FALSE otherwise. #define _mm_test_all_zeros(M, V) _mm_testz_si128((M), (V)) /* SSE4 64-bit Packed Integer Comparisons. */ /// Compares each of the corresponding 64-bit values of the 128-bit /// integer vectors for equality. /// /// \headerfile /// /// This intrinsic corresponds to the VPCMPEQQ / PCMPEQQ instruction. /// /// \param __V1 /// A 128-bit integer vector. /// \param __V2 /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the comparison results. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi64(__m128i __V1, __m128i __V2) { return (__m128i)((__v2di)__V1 == (__v2di)__V2); } /* SSE4 Packed Integer Sign-Extension. */ /// Sign-extends each of the lower eight 8-bit integer elements of a /// 128-bit vector of [16 x i8] to 16-bit values and returns them in a /// 128-bit vector of [8 x i16]. The upper eight elements of the input vector /// are unused. /// /// \headerfile /// /// This intrinsic corresponds to the VPMOVSXBW / PMOVSXBW instruction. /// /// \param __V /// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are /// sign-extended to 16-bit values. /// \returns A 128-bit vector of [8 x i16] containing the sign-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi16(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi); } /// Sign-extends each of the lower four 8-bit integer elements of a /// 128-bit vector of [16 x i8] to 32-bit values and returns them in a /// 128-bit vector of [4 x i32]. The upper twelve elements of the input /// vector are unused. /// /// \headerfile /// /// This intrinsic corresponds to the VPMOVSXBD / PMOVSXBD instruction. /// /// \param __V /// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are /// sign-extended to 32-bit values. /// \returns A 128-bit vector of [4 x i32] containing the sign-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi32(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4si); } /// Sign-extends each of the lower two 8-bit integer elements of a /// 128-bit integer vector of [16 x i8] to 64-bit values and returns them in /// a 128-bit vector of [2 x i64]. The upper fourteen elements of the input /// vector are unused. /// /// \headerfile /// /// This intrinsic corresponds to the VPMOVSXBQ / PMOVSXBQ instruction. /// /// \param __V /// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are /// sign-extended to 64-bit values. /// \returns A 128-bit vector of [2 x i64] containing the sign-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi64(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1), __v2di); } /// Sign-extends each of the lower four 16-bit integer elements of a /// 128-bit integer vector of [8 x i16] to 32-bit values and returns them in /// a 128-bit vector of [4 x i32]. The upper four elements of the input /// vector are unused. /// /// \headerfile /// /// This intrinsic corresponds to the VPMOVSXWD / PMOVSXWD instruction. /// /// \param __V /// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are /// sign-extended to 32-bit values. /// \returns A 128-bit vector of [4 x i32] containing the sign-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi32(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4si); } /// Sign-extends each of the lower two 16-bit integer elements of a /// 128-bit integer vector of [8 x i16] to 64-bit values and returns them in /// a 128-bit vector of [2 x i64]. The upper six elements of the input /// vector are unused. /// /// \headerfile /// /// This intrinsic corresponds to the VPMOVSXWQ / PMOVSXWQ instruction. /// /// \param __V /// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are /// sign-extended to 64-bit values. /// \returns A 128-bit vector of [2 x i64] containing the sign-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi64(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1), __v2di); } /// Sign-extends each of the lower two 32-bit integer elements of a /// 128-bit integer vector of [4 x i32] to 64-bit values and returns them in /// a 128-bit vector of [2 x i64]. The upper two elements of the input vector /// are unused. /// /// \headerfile /// /// This intrinsic corresponds to the VPMOVSXDQ / PMOVSXDQ instruction. /// /// \param __V /// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are /// sign-extended to 64-bit values. /// \returns A 128-bit vector of [2 x i64] containing the sign-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi32_epi64(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v4si)__V, (__v4si)__V, 0, 1), __v2di); } /* SSE4 Packed Integer Zero-Extension. */ /// Zero-extends each of the lower eight 8-bit integer elements of a /// 128-bit vector of [16 x i8] to 16-bit values and returns them in a /// 128-bit vector of [8 x i16]. The upper eight elements of the input vector /// are unused. /// /// \headerfile /// /// This intrinsic corresponds to the VPMOVZXBW / PMOVZXBW instruction. /// /// \param __V /// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are /// zero-extended to 16-bit values. /// \returns A 128-bit vector of [8 x i16] containing the zero-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi16(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi); } /// Zero-extends each of the lower four 8-bit integer elements of a /// 128-bit vector of [16 x i8] to 32-bit values and returns them in a /// 128-bit vector of [4 x i32]. The upper twelve elements of the input /// vector are unused. /// /// \headerfile /// /// This intrinsic corresponds to the VPMOVZXBD / PMOVZXBD instruction. /// /// \param __V /// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are /// zero-extended to 32-bit values. /// \returns A 128-bit vector of [4 x i32] containing the zero-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi32(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4si); } /// Zero-extends each of the lower two 8-bit integer elements of a /// 128-bit integer vector of [16 x i8] to 64-bit values and returns them in /// a 128-bit vector of [2 x i64]. The upper fourteen elements of the input /// vector are unused. /// /// \headerfile /// /// This intrinsic corresponds to the VPMOVZXBQ / PMOVZXBQ instruction. /// /// \param __V /// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are /// zero-extended to 64-bit values. /// \returns A 128-bit vector of [2 x i64] containing the zero-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi64(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1), __v2di); } /// Zero-extends each of the lower four 16-bit integer elements of a /// 128-bit integer vector of [8 x i16] to 32-bit values and returns them in /// a 128-bit vector of [4 x i32]. The upper four elements of the input /// vector are unused. /// /// \headerfile /// /// This intrinsic corresponds to the VPMOVZXWD / PMOVZXWD instruction. /// /// \param __V /// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are /// zero-extended to 32-bit values. /// \returns A 128-bit vector of [4 x i32] containing the zero-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi32(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4si); } /// Zero-extends each of the lower two 16-bit integer elements of a /// 128-bit integer vector of [8 x i16] to 64-bit values and returns them in /// a 128-bit vector of [2 x i64]. The upper six elements of the input vector /// are unused. /// /// \headerfile /// /// This intrinsic corresponds to the VPMOVZXWQ / PMOVZXWQ instruction. /// /// \param __V /// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are /// zero-extended to 64-bit values. /// \returns A 128-bit vector of [2 x i64] containing the zero-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi64(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1), __v2di); } /// Zero-extends each of the lower two 32-bit integer elements of a /// 128-bit integer vector of [4 x i32] to 64-bit values and returns them in /// a 128-bit vector of [2 x i64]. The upper two elements of the input vector /// are unused. /// /// \headerfile /// /// This intrinsic corresponds to the VPMOVZXDQ / PMOVZXDQ instruction. /// /// \param __V /// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are /// zero-extended to 64-bit values. /// \returns A 128-bit vector of [2 x i64] containing the zero-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu32_epi64(__m128i __V) { return (__m128i) __builtin_convertvector( __builtin_shufflevector((__v4su)__V, (__v4su)__V, 0, 1), __v2di); } /* SSE4 Pack with Unsigned Saturation. */ /// Converts 32-bit signed integers from both 128-bit integer vector /// operands into 16-bit unsigned integers, and returns the packed result. /// Values greater than 0xFFFF are saturated to 0xFFFF. Values less than /// 0x0000 are saturated to 0x0000. /// /// \headerfile /// /// This intrinsic corresponds to the VPACKUSDW / PACKUSDW instruction. /// /// \param __V1 /// A 128-bit vector of [4 x i32]. Each 32-bit element is treated as a /// signed integer and is converted to a 16-bit unsigned integer with /// saturation. Values greater than 0xFFFF are saturated to 0xFFFF. Values /// less than 0x0000 are saturated to 0x0000. The converted [4 x i16] values /// are written to the lower 64 bits of the result. /// \param __V2 /// A 128-bit vector of [4 x i32]. Each 32-bit element is treated as a /// signed integer and is converted to a 16-bit unsigned integer with /// saturation. Values greater than 0xFFFF are saturated to 0xFFFF. Values /// less than 0x0000 are saturated to 0x0000. The converted [4 x i16] values /// are written to the higher 64 bits of the result. /// \returns A 128-bit vector of [8 x i16] containing the converted values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi32(__m128i __V1, __m128i __V2) { return (__m128i)__builtin_ia32_packusdw128((__v4si)__V1, (__v4si)__V2); } /* SSE4 Multiple Packed Sums of Absolute Difference. */ /// Subtracts 8-bit unsigned integer values and computes the absolute /// values of the differences to the corresponding bits in the destination. /// Then sums of the absolute differences are returned according to the bit /// fields in the immediate operand. /// /// \headerfile /// /// \code /// __m128i _mm_mpsadbw_epu8(__m128i X, __m128i Y, const int M); /// \endcode /// /// This intrinsic corresponds to the VMPSADBW / MPSADBW instruction. /// /// \param X /// A 128-bit vector of [16 x i8]. /// \param Y /// A 128-bit vector of [16 x i8]. /// \param M /// An 8-bit immediate operand specifying how the absolute differences are to /// be calculated, according to the following algorithm: /// \code /// // M2 represents bit 2 of the immediate operand /// // M10 represents bits [1:0] of the immediate operand /// i = M2 * 4; /// j = M10 * 4; /// for (k = 0; k < 8; k = k + 1) { /// d0 = abs(X[i + k + 0] - Y[j + 0]); /// d1 = abs(X[i + k + 1] - Y[j + 1]); /// d2 = abs(X[i + k + 2] - Y[j + 2]); /// d3 = abs(X[i + k + 3] - Y[j + 3]); /// r[k] = d0 + d1 + d2 + d3; /// } /// \endcode /// \returns A 128-bit integer vector containing the sums of the sets of /// absolute differences between both operands. #define _mm_mpsadbw_epu8(X, Y, M) \ ((__m128i)__builtin_ia32_mpsadbw128((__v16qi)(__m128i)(X), \ (__v16qi)(__m128i)(Y), (M))) /// Finds the minimum unsigned 16-bit element in the input 128-bit /// vector of [8 x u16] and returns it and along with its index. /// /// \headerfile /// /// This intrinsic corresponds to the VPHMINPOSUW / PHMINPOSUW /// instruction. /// /// \param __V /// A 128-bit vector of [8 x u16]. /// \returns A 128-bit value where bits [15:0] contain the minimum value found /// in parameter \a __V, bits [18:16] contain the index of the minimum value /// and the remaining bits are set to 0. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_minpos_epu16(__m128i __V) { return (__m128i)__builtin_ia32_phminposuw128((__v8hi)__V); } /* Handle the sse4.2 definitions here. */ /* These definitions are normally in nmmintrin.h, but gcc puts them in here so we'll do the same. */ #undef __DEFAULT_FN_ATTRS #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("sse4.2"))) /* These specify the type of data that we're comparing. */ #define _SIDD_UBYTE_OPS 0x00 #define _SIDD_UWORD_OPS 0x01 #define _SIDD_SBYTE_OPS 0x02 #define _SIDD_SWORD_OPS 0x03 /* These specify the type of comparison operation. */ #define _SIDD_CMP_EQUAL_ANY 0x00 #define _SIDD_CMP_RANGES 0x04 #define _SIDD_CMP_EQUAL_EACH 0x08 #define _SIDD_CMP_EQUAL_ORDERED 0x0c /* These macros specify the polarity of the operation. */ #define _SIDD_POSITIVE_POLARITY 0x00 #define _SIDD_NEGATIVE_POLARITY 0x10 #define _SIDD_MASKED_POSITIVE_POLARITY 0x20 #define _SIDD_MASKED_NEGATIVE_POLARITY 0x30 /* These macros are used in _mm_cmpXstri() to specify the return. */ #define _SIDD_LEAST_SIGNIFICANT 0x00 #define _SIDD_MOST_SIGNIFICANT 0x40 /* These macros are used in _mm_cmpXstri() to specify the return. */ #define _SIDD_BIT_MASK 0x00 #define _SIDD_UNIT_MASK 0x40 /* SSE4.2 Packed Comparison Intrinsics. */ /// Uses the immediate operand \a M to perform a comparison of string /// data with implicitly defined lengths that is contained in source operands /// \a A and \a B. Returns a 128-bit integer vector representing the result /// mask of the comparison. /// /// \headerfile /// /// \code /// __m128i _mm_cmpistrm(__m128i A, __m128i B, const int M); /// \endcode /// /// This intrinsic corresponds to the VPCMPISTRM / PCMPISTRM /// instruction. /// /// \param A /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param B /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param M /// An 8-bit immediate operand specifying whether the characters are bytes or /// words, the type of comparison to perform, and the format of the return /// value. \n /// Bits [1:0]: Determine source data format. \n /// 00: 16 unsigned bytes \n /// 01: 8 unsigned words \n /// 10: 16 signed bytes \n /// 11: 8 signed words \n /// Bits [3:2]: Determine comparison type and aggregation method. \n /// 00: Subset: Each character in \a B is compared for equality with all /// the characters in \a A. \n /// 01: Ranges: Each character in \a B is compared to \a A. The comparison /// basis is greater than or equal for even-indexed elements in \a A, /// and less than or equal for odd-indexed elements in \a A. \n /// 10: Match: Compare each pair of corresponding characters in \a A and /// \a B for equality. \n /// 11: Substring: Search \a B for substring matches of \a A. \n /// Bits [5:4]: Determine whether to perform a one's complement on the bit /// mask of the comparison results. \n /// 00: No effect. \n /// 01: Negate the bit mask. \n /// 10: No effect. \n /// 11: Negate the bit mask only for bits with an index less than or equal /// to the size of \a A or \a B. \n /// Bit [6]: Determines whether the result is zero-extended or expanded to 16 /// bytes. \n /// 0: The result is zero-extended to 16 bytes. \n /// 1: The result is expanded to 16 bytes (this expansion is performed by /// repeating each bit 8 or 16 times). /// \returns Returns a 128-bit integer vector representing the result mask of /// the comparison. #define _mm_cmpistrm(A, B, M) \ ((__m128i)__builtin_ia32_pcmpistrm128((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), (int)(M))) /// Uses the immediate operand \a M to perform a comparison of string /// data with implicitly defined lengths that is contained in source operands /// \a A and \a B. Returns an integer representing the result index of the /// comparison. /// /// \headerfile /// /// \code /// int _mm_cmpistri(__m128i A, __m128i B, const int M); /// \endcode /// /// This intrinsic corresponds to the VPCMPISTRI / PCMPISTRI /// instruction. /// /// \param A /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param B /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param M /// An 8-bit immediate operand specifying whether the characters are bytes or /// words, the type of comparison to perform, and the format of the return /// value. \n /// Bits [1:0]: Determine source data format. \n /// 00: 16 unsigned bytes \n /// 01: 8 unsigned words \n /// 10: 16 signed bytes \n /// 11: 8 signed words \n /// Bits [3:2]: Determine comparison type and aggregation method. \n /// 00: Subset: Each character in \a B is compared for equality with all /// the characters in \a A. \n /// 01: Ranges: Each character in \a B is compared to \a A. The comparison /// basis is greater than or equal for even-indexed elements in \a A, /// and less than or equal for odd-indexed elements in \a A. \n /// 10: Match: Compare each pair of corresponding characters in \a A and /// \a B for equality. \n /// 11: Substring: Search B for substring matches of \a A. \n /// Bits [5:4]: Determine whether to perform a one's complement on the bit /// mask of the comparison results. \n /// 00: No effect. \n /// 01: Negate the bit mask. \n /// 10: No effect. \n /// 11: Negate the bit mask only for bits with an index less than or equal /// to the size of \a A or \a B. \n /// Bit [6]: Determines whether the index of the lowest set bit or the /// highest set bit is returned. \n /// 0: The index of the least significant set bit. \n /// 1: The index of the most significant set bit. \n /// \returns Returns an integer representing the result index of the comparison. #define _mm_cmpistri(A, B, M) \ ((int)__builtin_ia32_pcmpistri128((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), (int)(M))) /// Uses the immediate operand \a M to perform a comparison of string /// data with explicitly defined lengths that is contained in source operands /// \a A and \a B. Returns a 128-bit integer vector representing the result /// mask of the comparison. /// /// \headerfile /// /// \code /// __m128i _mm_cmpestrm(__m128i A, int LA, __m128i B, int LB, const int M); /// \endcode /// /// This intrinsic corresponds to the VPCMPESTRM / PCMPESTRM /// instruction. /// /// \param A /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param LA /// An integer that specifies the length of the string in \a A. /// \param B /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param LB /// An integer that specifies the length of the string in \a B. /// \param M /// An 8-bit immediate operand specifying whether the characters are bytes or /// words, the type of comparison to perform, and the format of the return /// value. \n /// Bits [1:0]: Determine source data format. \n /// 00: 16 unsigned bytes \n /// 01: 8 unsigned words \n /// 10: 16 signed bytes \n /// 11: 8 signed words \n /// Bits [3:2]: Determine comparison type and aggregation method. \n /// 00: Subset: Each character in \a B is compared for equality with all /// the characters in \a A. \n /// 01: Ranges: Each character in \a B is compared to \a A. The comparison /// basis is greater than or equal for even-indexed elements in \a A, /// and less than or equal for odd-indexed elements in \a A. \n /// 10: Match: Compare each pair of corresponding characters in \a A and /// \a B for equality. \n /// 11: Substring: Search \a B for substring matches of \a A. \n /// Bits [5:4]: Determine whether to perform a one's complement on the bit /// mask of the comparison results. \n /// 00: No effect. \n /// 01: Negate the bit mask. \n /// 10: No effect. \n /// 11: Negate the bit mask only for bits with an index less than or equal /// to the size of \a A or \a B. \n /// Bit [6]: Determines whether the result is zero-extended or expanded to 16 /// bytes. \n /// 0: The result is zero-extended to 16 bytes. \n /// 1: The result is expanded to 16 bytes (this expansion is performed by /// repeating each bit 8 or 16 times). \n /// \returns Returns a 128-bit integer vector representing the result mask of /// the comparison. #define _mm_cmpestrm(A, LA, B, LB, M) \ ((__m128i)__builtin_ia32_pcmpestrm128((__v16qi)(__m128i)(A), (int)(LA), \ (__v16qi)(__m128i)(B), (int)(LB), \ (int)(M))) /// Uses the immediate operand \a M to perform a comparison of string /// data with explicitly defined lengths that is contained in source operands /// \a A and \a B. Returns an integer representing the result index of the /// comparison. /// /// \headerfile /// /// \code /// int _mm_cmpestri(__m128i A, int LA, __m128i B, int LB, const int M); /// \endcode /// /// This intrinsic corresponds to the VPCMPESTRI / PCMPESTRI /// instruction. /// /// \param A /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param LA /// An integer that specifies the length of the string in \a A. /// \param B /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param LB /// An integer that specifies the length of the string in \a B. /// \param M /// An 8-bit immediate operand specifying whether the characters are bytes or /// words, the type of comparison to perform, and the format of the return /// value. \n /// Bits [1:0]: Determine source data format. \n /// 00: 16 unsigned bytes \n /// 01: 8 unsigned words \n /// 10: 16 signed bytes \n /// 11: 8 signed words \n /// Bits [3:2]: Determine comparison type and aggregation method. \n /// 00: Subset: Each character in \a B is compared for equality with all /// the characters in \a A. \n /// 01: Ranges: Each character in \a B is compared to \a A. The comparison /// basis is greater than or equal for even-indexed elements in \a A, /// and less than or equal for odd-indexed elements in \a A. \n /// 10: Match: Compare each pair of corresponding characters in \a A and /// \a B for equality. \n /// 11: Substring: Search B for substring matches of \a A. \n /// Bits [5:4]: Determine whether to perform a one's complement on the bit /// mask of the comparison results. \n /// 00: No effect. \n /// 01: Negate the bit mask. \n /// 10: No effect. \n /// 11: Negate the bit mask only for bits with an index less than or equal /// to the size of \a A or \a B. \n /// Bit [6]: Determines whether the index of the lowest set bit or the /// highest set bit is returned. \n /// 0: The index of the least significant set bit. \n /// 1: The index of the most significant set bit. \n /// \returns Returns an integer representing the result index of the comparison. #define _mm_cmpestri(A, LA, B, LB, M) \ ((int)__builtin_ia32_pcmpestri128((__v16qi)(__m128i)(A), (int)(LA), \ (__v16qi)(__m128i)(B), (int)(LB), \ (int)(M))) /* SSE4.2 Packed Comparison Intrinsics and EFlag Reading. */ /// Uses the immediate operand \a M to perform a comparison of string /// data with implicitly defined lengths that is contained in source operands /// \a A and \a B. Returns 1 if the bit mask is zero and the length of the /// string in \a B is the maximum, otherwise, returns 0. /// /// \headerfile /// /// \code /// int _mm_cmpistra(__m128i A, __m128i B, const int M); /// \endcode /// /// This intrinsic corresponds to the VPCMPISTRI / PCMPISTRI /// instruction. /// /// \param A /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param B /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param M /// An 8-bit immediate operand specifying whether the characters are bytes or /// words and the type of comparison to perform. \n /// Bits [1:0]: Determine source data format. \n /// 00: 16 unsigned bytes \n /// 01: 8 unsigned words \n /// 10: 16 signed bytes \n /// 11: 8 signed words \n /// Bits [3:2]: Determine comparison type and aggregation method. \n /// 00: Subset: Each character in \a B is compared for equality with all /// the characters in \a A. \n /// 01: Ranges: Each character in \a B is compared to \a A. The comparison /// basis is greater than or equal for even-indexed elements in \a A, /// and less than or equal for odd-indexed elements in \a A. \n /// 10: Match: Compare each pair of corresponding characters in \a A and /// \a B for equality. \n /// 11: Substring: Search \a B for substring matches of \a A. \n /// Bits [5:4]: Determine whether to perform a one's complement on the bit /// mask of the comparison results. \n /// 00: No effect. \n /// 01: Negate the bit mask. \n /// 10: No effect. \n /// 11: Negate the bit mask only for bits with an index less than or equal /// to the size of \a A or \a B. \n /// \returns Returns 1 if the bit mask is zero and the length of the string in /// \a B is the maximum; otherwise, returns 0. #define _mm_cmpistra(A, B, M) \ ((int)__builtin_ia32_pcmpistria128((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), (int)(M))) /// Uses the immediate operand \a M to perform a comparison of string /// data with implicitly defined lengths that is contained in source operands /// \a A and \a B. Returns 1 if the bit mask is non-zero, otherwise, returns /// 0. /// /// \headerfile /// /// \code /// int _mm_cmpistrc(__m128i A, __m128i B, const int M); /// \endcode /// /// This intrinsic corresponds to the VPCMPISTRI / PCMPISTRI /// instruction. /// /// \param A /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param B /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param M /// An 8-bit immediate operand specifying whether the characters are bytes or /// words and the type of comparison to perform. \n /// Bits [1:0]: Determine source data format. \n /// 00: 16 unsigned bytes \n /// 01: 8 unsigned words \n /// 10: 16 signed bytes \n /// 11: 8 signed words \n /// Bits [3:2]: Determine comparison type and aggregation method. \n /// 00: Subset: Each character in \a B is compared for equality with all /// the characters in \a A. \n /// 01: Ranges: Each character in \a B is compared to \a A. The comparison /// basis is greater than or equal for even-indexed elements in \a A, /// and less than or equal for odd-indexed elements in \a A. \n /// 10: Match: Compare each pair of corresponding characters in \a A and /// \a B for equality. \n /// 11: Substring: Search B for substring matches of \a A. \n /// Bits [5:4]: Determine whether to perform a one's complement on the bit /// mask of the comparison results. \n /// 00: No effect. \n /// 01: Negate the bit mask. \n /// 10: No effect. \n /// 11: Negate the bit mask only for bits with an index less than or equal /// to the size of \a A or \a B. /// \returns Returns 1 if the bit mask is non-zero, otherwise, returns 0. #define _mm_cmpistrc(A, B, M) \ ((int)__builtin_ia32_pcmpistric128((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), (int)(M))) /// Uses the immediate operand \a M to perform a comparison of string /// data with implicitly defined lengths that is contained in source operands /// \a A and \a B. Returns bit 0 of the resulting bit mask. /// /// \headerfile /// /// \code /// int _mm_cmpistro(__m128i A, __m128i B, const int M); /// \endcode /// /// This intrinsic corresponds to the VPCMPISTRI / PCMPISTRI /// instruction. /// /// \param A /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param B /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param M /// An 8-bit immediate operand specifying whether the characters are bytes or /// words and the type of comparison to perform. \n /// Bits [1:0]: Determine source data format. \n /// 00: 16 unsigned bytes \n /// 01: 8 unsigned words \n /// 10: 16 signed bytes \n /// 11: 8 signed words \n /// Bits [3:2]: Determine comparison type and aggregation method. \n /// 00: Subset: Each character in \a B is compared for equality with all /// the characters in \a A. \n /// 01: Ranges: Each character in \a B is compared to \a A. The comparison /// basis is greater than or equal for even-indexed elements in \a A, /// and less than or equal for odd-indexed elements in \a A. \n /// 10: Match: Compare each pair of corresponding characters in \a A and /// \a B for equality. \n /// 11: Substring: Search B for substring matches of \a A. \n /// Bits [5:4]: Determine whether to perform a one's complement on the bit /// mask of the comparison results. \n /// 00: No effect. \n /// 01: Negate the bit mask. \n /// 10: No effect. \n /// 11: Negate the bit mask only for bits with an index less than or equal /// to the size of \a A or \a B. \n /// \returns Returns bit 0 of the resulting bit mask. #define _mm_cmpistro(A, B, M) \ ((int)__builtin_ia32_pcmpistrio128((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), (int)(M))) /// Uses the immediate operand \a M to perform a comparison of string /// data with implicitly defined lengths that is contained in source operands /// \a A and \a B. Returns 1 if the length of the string in \a A is less than /// the maximum, otherwise, returns 0. /// /// \headerfile /// /// \code /// int _mm_cmpistrs(__m128i A, __m128i B, const int M); /// \endcode /// /// This intrinsic corresponds to the VPCMPISTRI / PCMPISTRI /// instruction. /// /// \param A /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param B /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param M /// An 8-bit immediate operand specifying whether the characters are bytes or /// words and the type of comparison to perform. \n /// Bits [1:0]: Determine source data format. \n /// 00: 16 unsigned bytes \n /// 01: 8 unsigned words \n /// 10: 16 signed bytes \n /// 11: 8 signed words \n /// Bits [3:2]: Determine comparison type and aggregation method. \n /// 00: Subset: Each character in \a B is compared for equality with all /// the characters in \a A. \n /// 01: Ranges: Each character in \a B is compared to \a A. The comparison /// basis is greater than or equal for even-indexed elements in \a A, /// and less than or equal for odd-indexed elements in \a A. \n /// 10: Match: Compare each pair of corresponding characters in \a A and /// \a B for equality. \n /// 11: Substring: Search \a B for substring matches of \a A. \n /// Bits [5:4]: Determine whether to perform a one's complement on the bit /// mask of the comparison results. \n /// 00: No effect. \n /// 01: Negate the bit mask. \n /// 10: No effect. \n /// 11: Negate the bit mask only for bits with an index less than or equal /// to the size of \a A or \a B. \n /// \returns Returns 1 if the length of the string in \a A is less than the /// maximum, otherwise, returns 0. #define _mm_cmpistrs(A, B, M) \ ((int)__builtin_ia32_pcmpistris128((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), (int)(M))) /// Uses the immediate operand \a M to perform a comparison of string /// data with implicitly defined lengths that is contained in source operands /// \a A and \a B. Returns 1 if the length of the string in \a B is less than /// the maximum, otherwise, returns 0. /// /// \headerfile /// /// \code /// int _mm_cmpistrz(__m128i A, __m128i B, const int M); /// \endcode /// /// This intrinsic corresponds to the VPCMPISTRI / PCMPISTRI /// instruction. /// /// \param A /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param B /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param M /// An 8-bit immediate operand specifying whether the characters are bytes or /// words and the type of comparison to perform. \n /// Bits [1:0]: Determine source data format. \n /// 00: 16 unsigned bytes \n /// 01: 8 unsigned words \n /// 10: 16 signed bytes \n /// 11: 8 signed words \n /// Bits [3:2]: Determine comparison type and aggregation method. \n /// 00: Subset: Each character in \a B is compared for equality with all /// the characters in \a A. \n /// 01: Ranges: Each character in \a B is compared to \a A. The comparison /// basis is greater than or equal for even-indexed elements in \a A, /// and less than or equal for odd-indexed elements in \a A. \n /// 10: Match: Compare each pair of corresponding characters in \a A and /// \a B for equality. \n /// 11: Substring: Search \a B for substring matches of \a A. \n /// Bits [5:4]: Determine whether to perform a one's complement on the bit /// mask of the comparison results. \n /// 00: No effect. \n /// 01: Negate the bit mask. \n /// 10: No effect. \n /// 11: Negate the bit mask only for bits with an index less than or equal /// to the size of \a A or \a B. /// \returns Returns 1 if the length of the string in \a B is less than the /// maximum, otherwise, returns 0. #define _mm_cmpistrz(A, B, M) \ ((int)__builtin_ia32_pcmpistriz128((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), (int)(M))) /// Uses the immediate operand \a M to perform a comparison of string /// data with explicitly defined lengths that is contained in source operands /// \a A and \a B. Returns 1 if the bit mask is zero and the length of the /// string in \a B is the maximum, otherwise, returns 0. /// /// \headerfile /// /// \code /// int _mm_cmpestra(__m128i A, int LA, __m128i B, int LB, const int M); /// \endcode /// /// This intrinsic corresponds to the VPCMPESTRI / PCMPESTRI /// instruction. /// /// \param A /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param LA /// An integer that specifies the length of the string in \a A. /// \param B /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param LB /// An integer that specifies the length of the string in \a B. /// \param M /// An 8-bit immediate operand specifying whether the characters are bytes or /// words and the type of comparison to perform. \n /// Bits [1:0]: Determine source data format. \n /// 00: 16 unsigned bytes \n /// 01: 8 unsigned words \n /// 10: 16 signed bytes \n /// 11: 8 signed words \n /// Bits [3:2]: Determine comparison type and aggregation method. \n /// 00: Subset: Each character in \a B is compared for equality with all /// the characters in \a A. \n /// 01: Ranges: Each character in \a B is compared to \a A. The comparison /// basis is greater than or equal for even-indexed elements in \a A, /// and less than or equal for odd-indexed elements in \a A. \n /// 10: Match: Compare each pair of corresponding characters in \a A and /// \a B for equality. \n /// 11: Substring: Search \a B for substring matches of \a A. \n /// Bits [5:4]: Determine whether to perform a one's complement on the bit /// mask of the comparison results. \n /// 00: No effect. \n /// 01: Negate the bit mask. \n /// 10: No effect. \n /// 11: Negate the bit mask only for bits with an index less than or equal /// to the size of \a A or \a B. /// \returns Returns 1 if the bit mask is zero and the length of the string in /// \a B is the maximum, otherwise, returns 0. #define _mm_cmpestra(A, LA, B, LB, M) \ ((int)__builtin_ia32_pcmpestria128((__v16qi)(__m128i)(A), (int)(LA), \ (__v16qi)(__m128i)(B), (int)(LB), \ (int)(M))) /// Uses the immediate operand \a M to perform a comparison of string /// data with explicitly defined lengths that is contained in source operands /// \a A and \a B. Returns 1 if the resulting mask is non-zero, otherwise, /// returns 0. /// /// \headerfile /// /// \code /// int _mm_cmpestrc(__m128i A, int LA, __m128i B, int LB, const int M); /// \endcode /// /// This intrinsic corresponds to the VPCMPESTRI / PCMPESTRI /// instruction. /// /// \param A /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param LA /// An integer that specifies the length of the string in \a A. /// \param B /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param LB /// An integer that specifies the length of the string in \a B. /// \param M /// An 8-bit immediate operand specifying whether the characters are bytes or /// words and the type of comparison to perform. \n /// Bits [1:0]: Determine source data format. \n /// 00: 16 unsigned bytes \n /// 01: 8 unsigned words \n /// 10: 16 signed bytes \n /// 11: 8 signed words \n /// Bits [3:2]: Determine comparison type and aggregation method. \n /// 00: Subset: Each character in \a B is compared for equality with all /// the characters in \a A. \n /// 01: Ranges: Each character in \a B is compared to \a A. The comparison /// basis is greater than or equal for even-indexed elements in \a A, /// and less than or equal for odd-indexed elements in \a A. \n /// 10: Match: Compare each pair of corresponding characters in \a A and /// \a B for equality. \n /// 11: Substring: Search \a B for substring matches of \a A. \n /// Bits [5:4]: Determine whether to perform a one's complement on the bit /// mask of the comparison results. \n /// 00: No effect. \n /// 01: Negate the bit mask. \n /// 10: No effect. \n /// 11: Negate the bit mask only for bits with an index less than or equal /// to the size of \a A or \a B. \n /// \returns Returns 1 if the resulting mask is non-zero, otherwise, returns 0. #define _mm_cmpestrc(A, LA, B, LB, M) \ ((int)__builtin_ia32_pcmpestric128((__v16qi)(__m128i)(A), (int)(LA), \ (__v16qi)(__m128i)(B), (int)(LB), \ (int)(M))) /// Uses the immediate operand \a M to perform a comparison of string /// data with explicitly defined lengths that is contained in source operands /// \a A and \a B. Returns bit 0 of the resulting bit mask. /// /// \headerfile /// /// \code /// int _mm_cmpestro(__m128i A, int LA, __m128i B, int LB, const int M); /// \endcode /// /// This intrinsic corresponds to the VPCMPESTRI / PCMPESTRI /// instruction. /// /// \param A /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param LA /// An integer that specifies the length of the string in \a A. /// \param B /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param LB /// An integer that specifies the length of the string in \a B. /// \param M /// An 8-bit immediate operand specifying whether the characters are bytes or /// words and the type of comparison to perform. \n /// Bits [1:0]: Determine source data format. \n /// 00: 16 unsigned bytes \n /// 01: 8 unsigned words \n /// 10: 16 signed bytes \n /// 11: 8 signed words \n /// Bits [3:2]: Determine comparison type and aggregation method. \n /// 00: Subset: Each character in \a B is compared for equality with all /// the characters in \a A. \n /// 01: Ranges: Each character in \a B is compared to \a A. The comparison /// basis is greater than or equal for even-indexed elements in \a A, /// and less than or equal for odd-indexed elements in \a A. \n /// 10: Match: Compare each pair of corresponding characters in \a A and /// \a B for equality. \n /// 11: Substring: Search \a B for substring matches of \a A. \n /// Bits [5:4]: Determine whether to perform a one's complement on the bit /// mask of the comparison results. \n /// 00: No effect. \n /// 01: Negate the bit mask. \n /// 10: No effect. \n /// 11: Negate the bit mask only for bits with an index less than or equal /// to the size of \a A or \a B. /// \returns Returns bit 0 of the resulting bit mask. #define _mm_cmpestro(A, LA, B, LB, M) \ ((int)__builtin_ia32_pcmpestrio128((__v16qi)(__m128i)(A), (int)(LA), \ (__v16qi)(__m128i)(B), (int)(LB), \ (int)(M))) /// Uses the immediate operand \a M to perform a comparison of string /// data with explicitly defined lengths that is contained in source operands /// \a A and \a B. Returns 1 if the length of the string in \a A is less than /// the maximum, otherwise, returns 0. /// /// \headerfile /// /// \code /// int _mm_cmpestrs(__m128i A, int LA, __m128i B, int LB, const int M); /// \endcode /// /// This intrinsic corresponds to the VPCMPESTRI / PCMPESTRI /// instruction. /// /// \param A /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param LA /// An integer that specifies the length of the string in \a A. /// \param B /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param LB /// An integer that specifies the length of the string in \a B. /// \param M /// An 8-bit immediate operand specifying whether the characters are bytes or /// words and the type of comparison to perform. \n /// Bits [1:0]: Determine source data format. \n /// 00: 16 unsigned bytes \n /// 01: 8 unsigned words \n /// 10: 16 signed bytes \n /// 11: 8 signed words \n /// Bits [3:2]: Determine comparison type and aggregation method. \n /// 00: Subset: Each character in \a B is compared for equality with all /// the characters in \a A. \n /// 01: Ranges: Each character in \a B is compared to \a A. The comparison /// basis is greater than or equal for even-indexed elements in \a A, /// and less than or equal for odd-indexed elements in \a A. \n /// 10: Match: Compare each pair of corresponding characters in \a A and /// \a B for equality. \n /// 11: Substring: Search \a B for substring matches of \a A. \n /// Bits [5:4]: Determine whether to perform a one's complement in the bit /// mask of the comparison results. \n /// 00: No effect. \n /// 01: Negate the bit mask. \n /// 10: No effect. \n /// 11: Negate the bit mask only for bits with an index less than or equal /// to the size of \a A or \a B. \n /// \returns Returns 1 if the length of the string in \a A is less than the /// maximum, otherwise, returns 0. #define _mm_cmpestrs(A, LA, B, LB, M) \ ((int)__builtin_ia32_pcmpestris128((__v16qi)(__m128i)(A), (int)(LA), \ (__v16qi)(__m128i)(B), (int)(LB), \ (int)(M))) /// Uses the immediate operand \a M to perform a comparison of string /// data with explicitly defined lengths that is contained in source operands /// \a A and \a B. Returns 1 if the length of the string in \a B is less than /// the maximum, otherwise, returns 0. /// /// \headerfile /// /// \code /// int _mm_cmpestrz(__m128i A, int LA, __m128i B, int LB, const int M); /// \endcode /// /// This intrinsic corresponds to the VPCMPESTRI instruction. /// /// \param A /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param LA /// An integer that specifies the length of the string in \a A. /// \param B /// A 128-bit integer vector containing one of the source operands to be /// compared. /// \param LB /// An integer that specifies the length of the string in \a B. /// \param M /// An 8-bit immediate operand specifying whether the characters are bytes or /// words and the type of comparison to perform. \n /// Bits [1:0]: Determine source data format. \n /// 00: 16 unsigned bytes \n /// 01: 8 unsigned words \n /// 10: 16 signed bytes \n /// 11: 8 signed words \n /// Bits [3:2]: Determine comparison type and aggregation method. \n /// 00: Subset: Each character in \a B is compared for equality with all /// the characters in \a A. \n /// 01: Ranges: Each character in \a B is compared to \a A. The comparison /// basis is greater than or equal for even-indexed elements in \a A, /// and less than or equal for odd-indexed elements in \a A. \n /// 10: Match: Compare each pair of corresponding characters in \a A and /// \a B for equality. \n /// 11: Substring: Search \a B for substring matches of \a A. \n /// Bits [5:4]: Determine whether to perform a one's complement on the bit /// mask of the comparison results. \n /// 00: No effect. \n /// 01: Negate the bit mask. \n /// 10: No effect. \n /// 11: Negate the bit mask only for bits with an index less than or equal /// to the size of \a A or \a B. /// \returns Returns 1 if the length of the string in \a B is less than the /// maximum, otherwise, returns 0. #define _mm_cmpestrz(A, LA, B, LB, M) \ ((int)__builtin_ia32_pcmpestriz128((__v16qi)(__m128i)(A), (int)(LA), \ (__v16qi)(__m128i)(B), (int)(LB), \ (int)(M))) /* SSE4.2 Compare Packed Data -- Greater Than. */ /// Compares each of the corresponding 64-bit values of the 128-bit /// integer vectors to determine if the values in the first operand are /// greater than those in the second operand. /// /// \headerfile /// /// This intrinsic corresponds to the VPCMPGTQ / PCMPGTQ instruction. /// /// \param __V1 /// A 128-bit integer vector. /// \param __V2 /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the comparison results. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi64(__m128i __V1, __m128i __V2) { return (__m128i)((__v2di)__V1 > (__v2di)__V2); } #undef __DEFAULT_FN_ATTRS #include #include #endif /* __SMMINTRIN_H */ /*===---- stdalign.h - Standard header for alignment ------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __STDALIGN_H #define __STDALIGN_H /* FIXME: This is using the placeholder dates Clang produces for these macros in C2x mode; switch to the correct values once they've been published. */ #if defined(__cplusplus) || \ (defined(__STDC_VERSION__) && __STDC_VERSION__ < 202000L) #ifndef __cplusplus #define alignas _Alignas #define alignof _Alignof #endif #define __alignas_is_defined 1 #define __alignof_is_defined 1 #endif /* __STDC_VERSION__ */ #endif /* __STDALIGN_H */ /*===---- stdarg.h - Variable argument handling ----------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __STDARG_H #ifndef __GNUC_VA_LIST #define __GNUC_VA_LIST typedef __builtin_va_list __gnuc_va_list; #endif #ifdef __need___va_list #undef __need___va_list #else #define __STDARG_H #ifndef _VA_LIST typedef __builtin_va_list va_list; #define _VA_LIST #endif /* FIXME: This is using the placeholder dates Clang produces for these macros in C2x mode; switch to the correct values once they've been published. */ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L /* C2x does not require the second parameter for va_start. */ #define va_start(ap, ...) __builtin_va_start(ap, 0) #else /* Versions before C2x do require the second parameter. */ #define va_start(ap, param) __builtin_va_start(ap, param) #endif #define va_end(ap) __builtin_va_end(ap) #define va_arg(ap, type) __builtin_va_arg(ap, type) /* GCC always defines __va_copy, but does not define va_copy unless in c99 mode * or -ansi is not specified, since it was not part of C90. */ #define __va_copy(d,s) __builtin_va_copy(d,s) #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ (defined(__cplusplus) && __cplusplus >= 201103L) || \ !defined(__STRICT_ANSI__) #define va_copy(dest, src) __builtin_va_copy(dest, src) #endif #endif /* __STDARG_H */ #endif /* not __STDARG_H */ /*===---- stdatomic.h - Standard header for atomic types and operations -----=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __CLANG_STDATOMIC_H #define __CLANG_STDATOMIC_H /* If we're hosted, fall back to the system's stdatomic.h. FreeBSD, for * example, already has a Clang-compatible stdatomic.h header. * * Exclude the MSVC path as well as the MSVC header as of the 14.31.30818 * explicitly disallows `stdatomic.h` in the C mode via an `#error`. Fallback * to the clang resource header until that is fully supported. The * `stdatomic.h` header requires C++ 23 or newer. */ #if __STDC_HOSTED__ && \ __has_include_next() && \ (!defined(_MSC_VER) || (defined(__cplusplus) && __cplusplus >= 202002L)) # include_next #else #include #include #ifdef __cplusplus extern "C" { #endif /* 7.17.1 Introduction */ #define ATOMIC_BOOL_LOCK_FREE __CLANG_ATOMIC_BOOL_LOCK_FREE #define ATOMIC_CHAR_LOCK_FREE __CLANG_ATOMIC_CHAR_LOCK_FREE #define ATOMIC_CHAR16_T_LOCK_FREE __CLANG_ATOMIC_CHAR16_T_LOCK_FREE #define ATOMIC_CHAR32_T_LOCK_FREE __CLANG_ATOMIC_CHAR32_T_LOCK_FREE #define ATOMIC_WCHAR_T_LOCK_FREE __CLANG_ATOMIC_WCHAR_T_LOCK_FREE #define ATOMIC_SHORT_LOCK_FREE __CLANG_ATOMIC_SHORT_LOCK_FREE #define ATOMIC_INT_LOCK_FREE __CLANG_ATOMIC_INT_LOCK_FREE #define ATOMIC_LONG_LOCK_FREE __CLANG_ATOMIC_LONG_LOCK_FREE #define ATOMIC_LLONG_LOCK_FREE __CLANG_ATOMIC_LLONG_LOCK_FREE #define ATOMIC_POINTER_LOCK_FREE __CLANG_ATOMIC_POINTER_LOCK_FREE /* 7.17.2 Initialization */ /* FIXME: This is using the placeholder dates Clang produces for these macros in C2x mode; switch to the correct values once they've been published. */ #if (defined(__STDC_VERSION__) && __STDC_VERSION__ < 202000L) || \ defined(__cplusplus) /* ATOMIC_VAR_INIT was removed in C2x, but still remains in C++23. */ #define ATOMIC_VAR_INIT(value) (value) #endif #if ((defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201710L && \ __STDC_VERSION__ < 202000L) || \ (defined(__cplusplus) && __cplusplus >= 202002L)) && \ !defined(_CLANG_DISABLE_CRT_DEPRECATION_WARNINGS) /* ATOMIC_VAR_INIT was deprecated in C17 and C++20. */ #pragma clang deprecated(ATOMIC_VAR_INIT) #endif #define atomic_init __c11_atomic_init /* 7.17.3 Order and consistency */ typedef enum memory_order { memory_order_relaxed = __ATOMIC_RELAXED, memory_order_consume = __ATOMIC_CONSUME, memory_order_acquire = __ATOMIC_ACQUIRE, memory_order_release = __ATOMIC_RELEASE, memory_order_acq_rel = __ATOMIC_ACQ_REL, memory_order_seq_cst = __ATOMIC_SEQ_CST } memory_order; #define kill_dependency(y) (y) /* 7.17.4 Fences */ /* These should be provided by the libc implementation. */ void atomic_thread_fence(memory_order); void atomic_signal_fence(memory_order); #define atomic_thread_fence(order) __c11_atomic_thread_fence(order) #define atomic_signal_fence(order) __c11_atomic_signal_fence(order) /* 7.17.5 Lock-free property */ #define atomic_is_lock_free(obj) __c11_atomic_is_lock_free(sizeof(*(obj))) /* 7.17.6 Atomic integer types */ #ifdef __cplusplus typedef _Atomic(bool) atomic_bool; #else typedef _Atomic(_Bool) atomic_bool; #endif typedef _Atomic(char) atomic_char; typedef _Atomic(signed char) atomic_schar; typedef _Atomic(unsigned char) atomic_uchar; typedef _Atomic(short) atomic_short; typedef _Atomic(unsigned short) atomic_ushort; typedef _Atomic(int) atomic_int; typedef _Atomic(unsigned int) atomic_uint; typedef _Atomic(long) atomic_long; typedef _Atomic(unsigned long) atomic_ulong; typedef _Atomic(long long) atomic_llong; typedef _Atomic(unsigned long long) atomic_ullong; typedef _Atomic(uint_least16_t) atomic_char16_t; typedef _Atomic(uint_least32_t) atomic_char32_t; typedef _Atomic(wchar_t) atomic_wchar_t; typedef _Atomic(int_least8_t) atomic_int_least8_t; typedef _Atomic(uint_least8_t) atomic_uint_least8_t; typedef _Atomic(int_least16_t) atomic_int_least16_t; typedef _Atomic(uint_least16_t) atomic_uint_least16_t; typedef _Atomic(int_least32_t) atomic_int_least32_t; typedef _Atomic(uint_least32_t) atomic_uint_least32_t; typedef _Atomic(int_least64_t) atomic_int_least64_t; typedef _Atomic(uint_least64_t) atomic_uint_least64_t; typedef _Atomic(int_fast8_t) atomic_int_fast8_t; typedef _Atomic(uint_fast8_t) atomic_uint_fast8_t; typedef _Atomic(int_fast16_t) atomic_int_fast16_t; typedef _Atomic(uint_fast16_t) atomic_uint_fast16_t; typedef _Atomic(int_fast32_t) atomic_int_fast32_t; typedef _Atomic(uint_fast32_t) atomic_uint_fast32_t; typedef _Atomic(int_fast64_t) atomic_int_fast64_t; typedef _Atomic(uint_fast64_t) atomic_uint_fast64_t; typedef _Atomic(intptr_t) atomic_intptr_t; typedef _Atomic(uintptr_t) atomic_uintptr_t; typedef _Atomic(size_t) atomic_size_t; typedef _Atomic(ptrdiff_t) atomic_ptrdiff_t; typedef _Atomic(intmax_t) atomic_intmax_t; typedef _Atomic(uintmax_t) atomic_uintmax_t; /* 7.17.7 Operations on atomic types */ #define atomic_store(object, desired) __c11_atomic_store(object, desired, __ATOMIC_SEQ_CST) #define atomic_store_explicit __c11_atomic_store #define atomic_load(object) __c11_atomic_load(object, __ATOMIC_SEQ_CST) #define atomic_load_explicit __c11_atomic_load #define atomic_exchange(object, desired) __c11_atomic_exchange(object, desired, __ATOMIC_SEQ_CST) #define atomic_exchange_explicit __c11_atomic_exchange #define atomic_compare_exchange_strong(object, expected, desired) __c11_atomic_compare_exchange_strong(object, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) #define atomic_compare_exchange_strong_explicit __c11_atomic_compare_exchange_strong #define atomic_compare_exchange_weak(object, expected, desired) __c11_atomic_compare_exchange_weak(object, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) #define atomic_compare_exchange_weak_explicit __c11_atomic_compare_exchange_weak #define atomic_fetch_add(object, operand) __c11_atomic_fetch_add(object, operand, __ATOMIC_SEQ_CST) #define atomic_fetch_add_explicit __c11_atomic_fetch_add #define atomic_fetch_sub(object, operand) __c11_atomic_fetch_sub(object, operand, __ATOMIC_SEQ_CST) #define atomic_fetch_sub_explicit __c11_atomic_fetch_sub #define atomic_fetch_or(object, operand) __c11_atomic_fetch_or(object, operand, __ATOMIC_SEQ_CST) #define atomic_fetch_or_explicit __c11_atomic_fetch_or #define atomic_fetch_xor(object, operand) __c11_atomic_fetch_xor(object, operand, __ATOMIC_SEQ_CST) #define atomic_fetch_xor_explicit __c11_atomic_fetch_xor #define atomic_fetch_and(object, operand) __c11_atomic_fetch_and(object, operand, __ATOMIC_SEQ_CST) #define atomic_fetch_and_explicit __c11_atomic_fetch_and /* 7.17.8 Atomic flag type and operations */ typedef struct atomic_flag { atomic_bool _Value; } atomic_flag; #define ATOMIC_FLAG_INIT { 0 } /* These should be provided by the libc implementation. */ #ifdef __cplusplus bool atomic_flag_test_and_set(volatile atomic_flag *); bool atomic_flag_test_and_set_explicit(volatile atomic_flag *, memory_order); #else _Bool atomic_flag_test_and_set(volatile atomic_flag *); _Bool atomic_flag_test_and_set_explicit(volatile atomic_flag *, memory_order); #endif void atomic_flag_clear(volatile atomic_flag *); void atomic_flag_clear_explicit(volatile atomic_flag *, memory_order); #define atomic_flag_test_and_set(object) __c11_atomic_exchange(&(object)->_Value, 1, __ATOMIC_SEQ_CST) #define atomic_flag_test_and_set_explicit(object, order) __c11_atomic_exchange(&(object)->_Value, 1, order) #define atomic_flag_clear(object) __c11_atomic_store(&(object)->_Value, 0, __ATOMIC_SEQ_CST) #define atomic_flag_clear_explicit(object, order) __c11_atomic_store(&(object)->_Value, 0, order) #ifdef __cplusplus } #endif #endif /* __STDC_HOSTED__ */ #endif /* __CLANG_STDATOMIC_H */ /*===---- stdbool.h - Standard header for booleans -------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __STDBOOL_H #define __STDBOOL_H #define __bool_true_false_are_defined 1 #if defined(__STDC_VERSION__) && __STDC_VERSION__ > 201710L /* FIXME: We should be issuing a deprecation warning here, but cannot yet due * to system headers which include this header file unconditionally. */ #elif !defined(__cplusplus) #define bool _Bool #define true 1 #define false 0 #elif defined(__GNUC__) && !defined(__STRICT_ANSI__) /* Define _Bool as a GNU extension. */ #define _Bool bool #if defined(__cplusplus) && __cplusplus < 201103L /* For C++98, define bool, false, true as a GNU extension. */ #define bool bool #define false false #define true true #endif #endif #endif /* __STDBOOL_H */ /*===---- stddef.h - Basic type definitions --------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #if !defined(__STDDEF_H) || defined(__need_ptrdiff_t) || \ defined(__need_size_t) || defined(__need_wchar_t) || \ defined(__need_NULL) || defined(__need_wint_t) #if !defined(__need_ptrdiff_t) && !defined(__need_size_t) && \ !defined(__need_wchar_t) && !defined(__need_NULL) && \ !defined(__need_wint_t) /* Always define miscellaneous pieces when modules are available. */ #if !__has_feature(modules) #define __STDDEF_H #endif #define __need_ptrdiff_t #define __need_size_t #define __need_wchar_t #define __need_NULL #define __need_STDDEF_H_misc /* __need_wint_t is intentionally not defined here. */ #endif #if defined(__need_ptrdiff_t) #if !defined(_PTRDIFF_T) || __has_feature(modules) /* Always define ptrdiff_t when modules are available. */ #if !__has_feature(modules) #define _PTRDIFF_T #endif typedef __PTRDIFF_TYPE__ ptrdiff_t; #endif #undef __need_ptrdiff_t #endif /* defined(__need_ptrdiff_t) */ #if defined(__need_size_t) #if !defined(_SIZE_T) || __has_feature(modules) /* Always define size_t when modules are available. */ #if !__has_feature(modules) #define _SIZE_T #endif typedef __SIZE_TYPE__ size_t; #endif #undef __need_size_t #endif /*defined(__need_size_t) */ #if defined(__need_STDDEF_H_misc) /* ISO9899:2011 7.20 (C11 Annex K): Define rsize_t if __STDC_WANT_LIB_EXT1__ is * enabled. */ #if (defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1 && \ !defined(_RSIZE_T)) || __has_feature(modules) /* Always define rsize_t when modules are available. */ #if !__has_feature(modules) #define _RSIZE_T #endif typedef __SIZE_TYPE__ rsize_t; #endif #endif /* defined(__need_STDDEF_H_misc) */ #if defined(__need_wchar_t) #if !defined(__cplusplus) || (defined(_MSC_VER) && !_NATIVE_WCHAR_T_DEFINED) /* Always define wchar_t when modules are available. */ #if !defined(_WCHAR_T) || __has_feature(modules) #if !__has_feature(modules) #define _WCHAR_T #if defined(_MSC_EXTENSIONS) #define _WCHAR_T_DEFINED #endif #endif typedef __WCHAR_TYPE__ wchar_t; #endif #endif #undef __need_wchar_t #endif /* defined(__need_wchar_t) */ #if defined(__need_NULL) #undef NULL #ifdef __cplusplus # if !defined(__MINGW32__) && !defined(_MSC_VER) # define NULL __null # else # define NULL 0 # endif #else # define NULL ((void*)0) #endif #ifdef __cplusplus #if defined(_MSC_EXTENSIONS) && defined(_NATIVE_NULLPTR_SUPPORTED) namespace std { typedef decltype(nullptr) nullptr_t; } using ::std::nullptr_t; #endif #endif #undef __need_NULL #endif /* defined(__need_NULL) */ /* FIXME: This is using the placeholder dates Clang produces for these macros in C2x mode; switch to the correct values once they've been published. */ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L typedef typeof(nullptr) nullptr_t; #endif /* defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L */ #if defined(__need_STDDEF_H_misc) && defined(__STDC_VERSION__) && \ __STDC_VERSION__ >= 202000L #define unreachable() __builtin_unreachable() #endif /* defined(__need_STDDEF_H_misc) && >= C23 */ #if defined(__need_STDDEF_H_misc) #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || \ (defined(__cplusplus) && __cplusplus >= 201103L) #include "__stddef_max_align_t.h" #endif #define offsetof(t, d) __builtin_offsetof(t, d) #undef __need_STDDEF_H_misc #endif /* defined(__need_STDDEF_H_misc) */ /* Some C libraries expect to see a wint_t here. Others (notably MinGW) will use __WINT_TYPE__ directly; accommodate both by requiring __need_wint_t */ #if defined(__need_wint_t) /* Always define wint_t when modules are available. */ #if !defined(_WINT_T) || __has_feature(modules) #if !__has_feature(modules) #define _WINT_T #endif typedef __WINT_TYPE__ wint_t; #endif #undef __need_wint_t #endif /* __need_wint_t */ #endif /*===---- stdint.h - Standard header for sized integer types --------------===*\ * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * \*===----------------------------------------------------------------------===*/ #ifndef __CLANG_STDINT_H2 // AIX system headers need stdint.h to be re-enterable while _STD_TYPES_T // is defined until an inclusion of it without _STD_TYPES_T occurs, in which // case the header guard macro is defined. #if !defined(_AIX) || !defined(_STD_TYPES_T) || !defined(__STDC_HOSTED__) #define __CLANG_STDINT_H2 #endif /* If we're hosted, fall back to the system's stdint.h, which might have * additional definitions. */ #if __STDC_HOSTED__ && __has_include_next() // C99 7.18.3 Limits of other integer types // // Footnote 219, 220: C++ implementations should define these macros only when // __STDC_LIMIT_MACROS is defined before is included. // // Footnote 222: C++ implementations should define these macros only when // __STDC_CONSTANT_MACROS is defined before is included. // // C++11 [cstdint.syn]p2: // // The macros defined by are provided unconditionally. In particular, // the symbols __STDC_LIMIT_MACROS and __STDC_CONSTANT_MACROS (mentioned in // footnotes 219, 220, and 222 in the C standard) play no role in C++. // // C11 removed the problematic footnotes. // // Work around this inconsistency by always defining those macros in C++ mode, // so that a C library implementation which follows the C99 standard can be // used in C++. # ifdef __cplusplus # if !defined(__STDC_LIMIT_MACROS) # define __STDC_LIMIT_MACROS # define __STDC_LIMIT_MACROS_DEFINED_BY_CLANG # endif # if !defined(__STDC_CONSTANT_MACROS) # define __STDC_CONSTANT_MACROS # define __STDC_CONSTANT_MACROS_DEFINED_BY_CLANG # endif # endif # include_next # ifdef __STDC_LIMIT_MACROS_DEFINED_BY_CLANG # undef __STDC_LIMIT_MACROS # undef __STDC_LIMIT_MACROS_DEFINED_BY_CLANG # endif # ifdef __STDC_CONSTANT_MACROS_DEFINED_BY_CLANG # undef __STDC_CONSTANT_MACROS # undef __STDC_CONSTANT_MACROS_DEFINED_BY_CLANG # endif #else /* C99 7.18.1.1 Exact-width integer types. * C99 7.18.1.2 Minimum-width integer types. * C99 7.18.1.3 Fastest minimum-width integer types. * * The standard requires that exact-width type be defined for 8-, 16-, 32-, and * 64-bit types if they are implemented. Other exact width types are optional. * This implementation defines an exact-width types for every integer width * that is represented in the standard integer types. * * The standard also requires minimum-width types be defined for 8-, 16-, 32-, * and 64-bit widths regardless of whether there are corresponding exact-width * types. * * To accommodate targets that are missing types that are exactly 8, 16, 32, or * 64 bits wide, this implementation takes an approach of cascading * redefinitions, redefining __int_leastN_t to successively smaller exact-width * types. It is therefore important that the types are defined in order of * descending widths. * * We currently assume that the minimum-width types and the fastest * minimum-width types are the same. This is allowed by the standard, but is * suboptimal. * * In violation of the standard, some targets do not implement a type that is * wide enough to represent all of the required widths (8-, 16-, 32-, 64-bit). * To accommodate these targets, a required minimum-width type is only * defined if there exists an exact-width type of equal or greater width. */ #ifdef __INT64_TYPE__ # ifndef __int8_t_defined /* glibc sys/types.h also defines int64_t*/ typedef __INT64_TYPE__ int64_t; # endif /* __int8_t_defined */ typedef __UINT64_TYPE__ uint64_t; # undef __int_least64_t # define __int_least64_t int64_t # undef __uint_least64_t # define __uint_least64_t uint64_t # undef __int_least32_t # define __int_least32_t int64_t # undef __uint_least32_t # define __uint_least32_t uint64_t # undef __int_least16_t # define __int_least16_t int64_t # undef __uint_least16_t # define __uint_least16_t uint64_t # undef __int_least8_t # define __int_least8_t int64_t # undef __uint_least8_t # define __uint_least8_t uint64_t #endif /* __INT64_TYPE__ */ #ifdef __int_least64_t typedef __int_least64_t int_least64_t; typedef __uint_least64_t uint_least64_t; typedef __int_least64_t int_fast64_t; typedef __uint_least64_t uint_fast64_t; #endif /* __int_least64_t */ #ifdef __INT56_TYPE__ typedef __INT56_TYPE__ int56_t; typedef __UINT56_TYPE__ uint56_t; typedef int56_t int_least56_t; typedef uint56_t uint_least56_t; typedef int56_t int_fast56_t; typedef uint56_t uint_fast56_t; # undef __int_least32_t # define __int_least32_t int56_t # undef __uint_least32_t # define __uint_least32_t uint56_t # undef __int_least16_t # define __int_least16_t int56_t # undef __uint_least16_t # define __uint_least16_t uint56_t # undef __int_least8_t # define __int_least8_t int56_t # undef __uint_least8_t # define __uint_least8_t uint56_t #endif /* __INT56_TYPE__ */ #ifdef __INT48_TYPE__ typedef __INT48_TYPE__ int48_t; typedef __UINT48_TYPE__ uint48_t; typedef int48_t int_least48_t; typedef uint48_t uint_least48_t; typedef int48_t int_fast48_t; typedef uint48_t uint_fast48_t; # undef __int_least32_t # define __int_least32_t int48_t # undef __uint_least32_t # define __uint_least32_t uint48_t # undef __int_least16_t # define __int_least16_t int48_t # undef __uint_least16_t # define __uint_least16_t uint48_t # undef __int_least8_t # define __int_least8_t int48_t # undef __uint_least8_t # define __uint_least8_t uint48_t #endif /* __INT48_TYPE__ */ #ifdef __INT40_TYPE__ typedef __INT40_TYPE__ int40_t; typedef __UINT40_TYPE__ uint40_t; typedef int40_t int_least40_t; typedef uint40_t uint_least40_t; typedef int40_t int_fast40_t; typedef uint40_t uint_fast40_t; # undef __int_least32_t # define __int_least32_t int40_t # undef __uint_least32_t # define __uint_least32_t uint40_t # undef __int_least16_t # define __int_least16_t int40_t # undef __uint_least16_t # define __uint_least16_t uint40_t # undef __int_least8_t # define __int_least8_t int40_t # undef __uint_least8_t # define __uint_least8_t uint40_t #endif /* __INT40_TYPE__ */ #ifdef __INT32_TYPE__ # ifndef __int8_t_defined /* glibc sys/types.h also defines int32_t*/ typedef __INT32_TYPE__ int32_t; # endif /* __int8_t_defined */ # ifndef __uint32_t_defined /* more glibc compatibility */ # define __uint32_t_defined typedef __UINT32_TYPE__ uint32_t; # endif /* __uint32_t_defined */ # undef __int_least32_t # define __int_least32_t int32_t # undef __uint_least32_t # define __uint_least32_t uint32_t # undef __int_least16_t # define __int_least16_t int32_t # undef __uint_least16_t # define __uint_least16_t uint32_t # undef __int_least8_t # define __int_least8_t int32_t # undef __uint_least8_t # define __uint_least8_t uint32_t #endif /* __INT32_TYPE__ */ #ifdef __int_least32_t typedef __int_least32_t int_least32_t; typedef __uint_least32_t uint_least32_t; typedef __int_least32_t int_fast32_t; typedef __uint_least32_t uint_fast32_t; #endif /* __int_least32_t */ #ifdef __INT24_TYPE__ typedef __INT24_TYPE__ int24_t; typedef __UINT24_TYPE__ uint24_t; typedef int24_t int_least24_t; typedef uint24_t uint_least24_t; typedef int24_t int_fast24_t; typedef uint24_t uint_fast24_t; # undef __int_least16_t # define __int_least16_t int24_t # undef __uint_least16_t # define __uint_least16_t uint24_t # undef __int_least8_t # define __int_least8_t int24_t # undef __uint_least8_t # define __uint_least8_t uint24_t #endif /* __INT24_TYPE__ */ #ifdef __INT16_TYPE__ #ifndef __int8_t_defined /* glibc sys/types.h also defines int16_t*/ typedef __INT16_TYPE__ int16_t; #endif /* __int8_t_defined */ typedef __UINT16_TYPE__ uint16_t; # undef __int_least16_t # define __int_least16_t int16_t # undef __uint_least16_t # define __uint_least16_t uint16_t # undef __int_least8_t # define __int_least8_t int16_t # undef __uint_least8_t # define __uint_least8_t uint16_t #endif /* __INT16_TYPE__ */ #ifdef __int_least16_t typedef __int_least16_t int_least16_t; typedef __uint_least16_t uint_least16_t; typedef __int_least16_t int_fast16_t; typedef __uint_least16_t uint_fast16_t; #endif /* __int_least16_t */ #ifdef __INT8_TYPE__ #ifndef __int8_t_defined /* glibc sys/types.h also defines int8_t*/ typedef __INT8_TYPE__ int8_t; #endif /* __int8_t_defined */ typedef __UINT8_TYPE__ uint8_t; # undef __int_least8_t # define __int_least8_t int8_t # undef __uint_least8_t # define __uint_least8_t uint8_t #endif /* __INT8_TYPE__ */ #ifdef __int_least8_t typedef __int_least8_t int_least8_t; typedef __uint_least8_t uint_least8_t; typedef __int_least8_t int_fast8_t; typedef __uint_least8_t uint_fast8_t; #endif /* __int_least8_t */ /* prevent glibc sys/types.h from defining conflicting types */ #ifndef __int8_t_defined # define __int8_t_defined #endif /* __int8_t_defined */ /* C99 7.18.1.4 Integer types capable of holding object pointers. */ #define __stdint_join3(a,b,c) a ## b ## c #ifndef _INTPTR_T #ifndef __intptr_t_defined typedef __INTPTR_TYPE__ intptr_t; #define __intptr_t_defined #define _INTPTR_T #endif #endif #ifndef _UINTPTR_T typedef __UINTPTR_TYPE__ uintptr_t; #define _UINTPTR_T #endif /* C99 7.18.1.5 Greatest-width integer types. */ typedef __INTMAX_TYPE__ intmax_t; typedef __UINTMAX_TYPE__ uintmax_t; /* C99 7.18.4 Macros for minimum-width integer constants. * * The standard requires that integer constant macros be defined for all the * minimum-width types defined above. As 8-, 16-, 32-, and 64-bit minimum-width * types are required, the corresponding integer constant macros are defined * here. This implementation also defines minimum-width types for every other * integer width that the target implements, so corresponding macros are * defined below, too. * * These macros are defined using the same successive-shrinking approach as * the type definitions above. It is likewise important that macros are defined * in order of decending width. * * Note that C++ should not check __STDC_CONSTANT_MACROS here, contrary to the * claims of the C standard (see C++ 18.3.1p2, [cstdint.syn]). */ #define __int_c_join(a, b) a ## b #define __int_c(v, suffix) __int_c_join(v, suffix) #define __uint_c(v, suffix) __int_c_join(v##U, suffix) #ifdef __INT64_TYPE__ # undef __int64_c_suffix # undef __int32_c_suffix # undef __int16_c_suffix # undef __int8_c_suffix # ifdef __INT64_C_SUFFIX__ # define __int64_c_suffix __INT64_C_SUFFIX__ # define __int32_c_suffix __INT64_C_SUFFIX__ # define __int16_c_suffix __INT64_C_SUFFIX__ # define __int8_c_suffix __INT64_C_SUFFIX__ # endif /* __INT64_C_SUFFIX__ */ #endif /* __INT64_TYPE__ */ #ifdef __int_least64_t # ifdef __int64_c_suffix # define INT64_C(v) __int_c(v, __int64_c_suffix) # define UINT64_C(v) __uint_c(v, __int64_c_suffix) # else # define INT64_C(v) v # define UINT64_C(v) v ## U # endif /* __int64_c_suffix */ #endif /* __int_least64_t */ #ifdef __INT56_TYPE__ # undef __int32_c_suffix # undef __int16_c_suffix # undef __int8_c_suffix # ifdef __INT56_C_SUFFIX__ # define INT56_C(v) __int_c(v, __INT56_C_SUFFIX__) # define UINT56_C(v) __uint_c(v, __INT56_C_SUFFIX__) # define __int32_c_suffix __INT56_C_SUFFIX__ # define __int16_c_suffix __INT56_C_SUFFIX__ # define __int8_c_suffix __INT56_C_SUFFIX__ # else # define INT56_C(v) v # define UINT56_C(v) v ## U # endif /* __INT56_C_SUFFIX__ */ #endif /* __INT56_TYPE__ */ #ifdef __INT48_TYPE__ # undef __int32_c_suffix # undef __int16_c_suffix # undef __int8_c_suffix # ifdef __INT48_C_SUFFIX__ # define INT48_C(v) __int_c(v, __INT48_C_SUFFIX__) # define UINT48_C(v) __uint_c(v, __INT48_C_SUFFIX__) # define __int32_c_suffix __INT48_C_SUFFIX__ # define __int16_c_suffix __INT48_C_SUFFIX__ # define __int8_c_suffix __INT48_C_SUFFIX__ # else # define INT48_C(v) v # define UINT48_C(v) v ## U # endif /* __INT48_C_SUFFIX__ */ #endif /* __INT48_TYPE__ */ #ifdef __INT40_TYPE__ # undef __int32_c_suffix # undef __int16_c_suffix # undef __int8_c_suffix # ifdef __INT40_C_SUFFIX__ # define INT40_C(v) __int_c(v, __INT40_C_SUFFIX__) # define UINT40_C(v) __uint_c(v, __INT40_C_SUFFIX__) # define __int32_c_suffix __INT40_C_SUFFIX__ # define __int16_c_suffix __INT40_C_SUFFIX__ # define __int8_c_suffix __INT40_C_SUFFIX__ # else # define INT40_C(v) v # define UINT40_C(v) v ## U # endif /* __INT40_C_SUFFIX__ */ #endif /* __INT40_TYPE__ */ #ifdef __INT32_TYPE__ # undef __int32_c_suffix # undef __int16_c_suffix # undef __int8_c_suffix # ifdef __INT32_C_SUFFIX__ # define __int32_c_suffix __INT32_C_SUFFIX__ # define __int16_c_suffix __INT32_C_SUFFIX__ # define __int8_c_suffix __INT32_C_SUFFIX__ # endif /* __INT32_C_SUFFIX__ */ #endif /* __INT32_TYPE__ */ #ifdef __int_least32_t # ifdef __int32_c_suffix # define INT32_C(v) __int_c(v, __int32_c_suffix) # define UINT32_C(v) __uint_c(v, __int32_c_suffix) # else # define INT32_C(v) v # define UINT32_C(v) v ## U # endif /* __int32_c_suffix */ #endif /* __int_least32_t */ #ifdef __INT24_TYPE__ # undef __int16_c_suffix # undef __int8_c_suffix # ifdef __INT24_C_SUFFIX__ # define INT24_C(v) __int_c(v, __INT24_C_SUFFIX__) # define UINT24_C(v) __uint_c(v, __INT24_C_SUFFIX__) # define __int16_c_suffix __INT24_C_SUFFIX__ # define __int8_c_suffix __INT24_C_SUFFIX__ # else # define INT24_C(v) v # define UINT24_C(v) v ## U # endif /* __INT24_C_SUFFIX__ */ #endif /* __INT24_TYPE__ */ #ifdef __INT16_TYPE__ # undef __int16_c_suffix # undef __int8_c_suffix # ifdef __INT16_C_SUFFIX__ # define __int16_c_suffix __INT16_C_SUFFIX__ # define __int8_c_suffix __INT16_C_SUFFIX__ # endif /* __INT16_C_SUFFIX__ */ #endif /* __INT16_TYPE__ */ #ifdef __int_least16_t # ifdef __int16_c_suffix # define INT16_C(v) __int_c(v, __int16_c_suffix) # define UINT16_C(v) __uint_c(v, __int16_c_suffix) # else # define INT16_C(v) v # define UINT16_C(v) v ## U # endif /* __int16_c_suffix */ #endif /* __int_least16_t */ #ifdef __INT8_TYPE__ # undef __int8_c_suffix # ifdef __INT8_C_SUFFIX__ # define __int8_c_suffix __INT8_C_SUFFIX__ # endif /* __INT8_C_SUFFIX__ */ #endif /* __INT8_TYPE__ */ #ifdef __int_least8_t # ifdef __int8_c_suffix # define INT8_C(v) __int_c(v, __int8_c_suffix) # define UINT8_C(v) __uint_c(v, __int8_c_suffix) # else # define INT8_C(v) v # define UINT8_C(v) v ## U # endif /* __int8_c_suffix */ #endif /* __int_least8_t */ /* C99 7.18.2.1 Limits of exact-width integer types. * C99 7.18.2.2 Limits of minimum-width integer types. * C99 7.18.2.3 Limits of fastest minimum-width integer types. * * The presence of limit macros are completely optional in C99. This * implementation defines limits for all of the types (exact- and * minimum-width) that it defines above, using the limits of the minimum-width * type for any types that do not have exact-width representations. * * As in the type definitions, this section takes an approach of * successive-shrinking to determine which limits to use for the standard (8, * 16, 32, 64) bit widths when they don't have exact representations. It is * therefore important that the definitions be kept in order of decending * widths. * * Note that C++ should not check __STDC_LIMIT_MACROS here, contrary to the * claims of the C standard (see C++ 18.3.1p2, [cstdint.syn]). */ #ifdef __INT64_TYPE__ # define INT64_MAX INT64_C( 9223372036854775807) # define INT64_MIN (-INT64_C( 9223372036854775807)-1) # define UINT64_MAX UINT64_C(18446744073709551615) /* FIXME: This is using the placeholder dates Clang produces for these macros in C2x mode; switch to the correct values once they've been published. */ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L # define UINT64_WIDTH 64 # define INT64_WIDTH UINT64_WIDTH # define __UINT_LEAST64_WIDTH UINT64_WIDTH # undef __UINT_LEAST32_WIDTH # define __UINT_LEAST32_WIDTH UINT64_WIDTH # undef __UINT_LEAST16_WIDTH # define __UINT_LEAST16_WIDTH UINT64_WIDTH # undef __UINT_LEAST8_MAX # define __UINT_LEAST8_MAX UINT64_MAX #endif /* __STDC_VERSION__ */ # define __INT_LEAST64_MIN INT64_MIN # define __INT_LEAST64_MAX INT64_MAX # define __UINT_LEAST64_MAX UINT64_MAX # undef __INT_LEAST32_MIN # define __INT_LEAST32_MIN INT64_MIN # undef __INT_LEAST32_MAX # define __INT_LEAST32_MAX INT64_MAX # undef __UINT_LEAST32_MAX # define __UINT_LEAST32_MAX UINT64_MAX # undef __INT_LEAST16_MIN # define __INT_LEAST16_MIN INT64_MIN # undef __INT_LEAST16_MAX # define __INT_LEAST16_MAX INT64_MAX # undef __UINT_LEAST16_MAX # define __UINT_LEAST16_MAX UINT64_MAX # undef __INT_LEAST8_MIN # define __INT_LEAST8_MIN INT64_MIN # undef __INT_LEAST8_MAX # define __INT_LEAST8_MAX INT64_MAX # undef __UINT_LEAST8_MAX # define __UINT_LEAST8_MAX UINT64_MAX #endif /* __INT64_TYPE__ */ #ifdef __INT_LEAST64_MIN # define INT_LEAST64_MIN __INT_LEAST64_MIN # define INT_LEAST64_MAX __INT_LEAST64_MAX # define UINT_LEAST64_MAX __UINT_LEAST64_MAX # define INT_FAST64_MIN __INT_LEAST64_MIN # define INT_FAST64_MAX __INT_LEAST64_MAX # define UINT_FAST64_MAX __UINT_LEAST64_MAX /* FIXME: This is using the placeholder dates Clang produces for these macros in C2x mode; switch to the correct values once they've been published. */ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L # define UINT_LEAST64_WIDTH __UINT_LEAST64_WIDTH # define INT_LEAST64_WIDTH UINT_LEAST64_WIDTH # define UINT_FAST64_WIDTH __UINT_LEAST64_WIDTH # define INT_FAST64_WIDTH UINT_FAST64_WIDTH #endif /* __STDC_VERSION__ */ #endif /* __INT_LEAST64_MIN */ #ifdef __INT56_TYPE__ # define INT56_MAX INT56_C(36028797018963967) # define INT56_MIN (-INT56_C(36028797018963967)-1) # define UINT56_MAX UINT56_C(72057594037927935) # define INT_LEAST56_MIN INT56_MIN # define INT_LEAST56_MAX INT56_MAX # define UINT_LEAST56_MAX UINT56_MAX # define INT_FAST56_MIN INT56_MIN # define INT_FAST56_MAX INT56_MAX # define UINT_FAST56_MAX UINT56_MAX # undef __INT_LEAST32_MIN # define __INT_LEAST32_MIN INT56_MIN # undef __INT_LEAST32_MAX # define __INT_LEAST32_MAX INT56_MAX # undef __UINT_LEAST32_MAX # define __UINT_LEAST32_MAX UINT56_MAX # undef __INT_LEAST16_MIN # define __INT_LEAST16_MIN INT56_MIN # undef __INT_LEAST16_MAX # define __INT_LEAST16_MAX INT56_MAX # undef __UINT_LEAST16_MAX # define __UINT_LEAST16_MAX UINT56_MAX # undef __INT_LEAST8_MIN # define __INT_LEAST8_MIN INT56_MIN # undef __INT_LEAST8_MAX # define __INT_LEAST8_MAX INT56_MAX # undef __UINT_LEAST8_MAX # define __UINT_LEAST8_MAX UINT56_MAX /* FIXME: This is using the placeholder dates Clang produces for these macros in C2x mode; switch to the correct values once they've been published. */ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L # define UINT56_WIDTH 56 # define INT56_WIDTH UINT56_WIDTH # define UINT_LEAST56_WIDTH UINT56_WIDTH # define INT_LEAST56_WIDTH UINT_LEAST56_WIDTH # define UINT_FAST56_WIDTH UINT56_WIDTH # define INT_FAST56_WIDTH UINT_FAST56_WIDTH # undef __UINT_LEAST32_WIDTH # define __UINT_LEAST32_WIDTH UINT56_WIDTH # undef __UINT_LEAST16_WIDTH # define __UINT_LEAST16_WIDTH UINT56_WIDTH # undef __UINT_LEAST8_WIDTH # define __UINT_LEAST8_WIDTH UINT56_WIDTH #endif /* __STDC_VERSION__ */ #endif /* __INT56_TYPE__ */ #ifdef __INT48_TYPE__ # define INT48_MAX INT48_C(140737488355327) # define INT48_MIN (-INT48_C(140737488355327)-1) # define UINT48_MAX UINT48_C(281474976710655) # define INT_LEAST48_MIN INT48_MIN # define INT_LEAST48_MAX INT48_MAX # define UINT_LEAST48_MAX UINT48_MAX # define INT_FAST48_MIN INT48_MIN # define INT_FAST48_MAX INT48_MAX # define UINT_FAST48_MAX UINT48_MAX # undef __INT_LEAST32_MIN # define __INT_LEAST32_MIN INT48_MIN # undef __INT_LEAST32_MAX # define __INT_LEAST32_MAX INT48_MAX # undef __UINT_LEAST32_MAX # define __UINT_LEAST32_MAX UINT48_MAX # undef __INT_LEAST16_MIN # define __INT_LEAST16_MIN INT48_MIN # undef __INT_LEAST16_MAX # define __INT_LEAST16_MAX INT48_MAX # undef __UINT_LEAST16_MAX # define __UINT_LEAST16_MAX UINT48_MAX # undef __INT_LEAST8_MIN # define __INT_LEAST8_MIN INT48_MIN # undef __INT_LEAST8_MAX # define __INT_LEAST8_MAX INT48_MAX # undef __UINT_LEAST8_MAX # define __UINT_LEAST8_MAX UINT48_MAX /* FIXME: This is using the placeholder dates Clang produces for these macros in C2x mode; switch to the correct values once they've been published. */ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L #define UINT48_WIDTH 48 #define INT48_WIDTH UINT48_WIDTH #define UINT_LEAST48_WIDTH UINT48_WIDTH #define INT_LEAST48_WIDTH UINT_LEAST48_WIDTH #define UINT_FAST48_WIDTH UINT48_WIDTH #define INT_FAST48_WIDTH UINT_FAST48_WIDTH #undef __UINT_LEAST32_WIDTH #define __UINT_LEAST32_WIDTH UINT48_WIDTH # undef __UINT_LEAST16_WIDTH #define __UINT_LEAST16_WIDTH UINT48_WIDTH # undef __UINT_LEAST8_WIDTH #define __UINT_LEAST8_WIDTH UINT48_WIDTH #endif /* __STDC_VERSION__ */ #endif /* __INT48_TYPE__ */ #ifdef __INT40_TYPE__ # define INT40_MAX INT40_C(549755813887) # define INT40_MIN (-INT40_C(549755813887)-1) # define UINT40_MAX UINT40_C(1099511627775) # define INT_LEAST40_MIN INT40_MIN # define INT_LEAST40_MAX INT40_MAX # define UINT_LEAST40_MAX UINT40_MAX # define INT_FAST40_MIN INT40_MIN # define INT_FAST40_MAX INT40_MAX # define UINT_FAST40_MAX UINT40_MAX # undef __INT_LEAST32_MIN # define __INT_LEAST32_MIN INT40_MIN # undef __INT_LEAST32_MAX # define __INT_LEAST32_MAX INT40_MAX # undef __UINT_LEAST32_MAX # define __UINT_LEAST32_MAX UINT40_MAX # undef __INT_LEAST16_MIN # define __INT_LEAST16_MIN INT40_MIN # undef __INT_LEAST16_MAX # define __INT_LEAST16_MAX INT40_MAX # undef __UINT_LEAST16_MAX # define __UINT_LEAST16_MAX UINT40_MAX # undef __INT_LEAST8_MIN # define __INT_LEAST8_MIN INT40_MIN # undef __INT_LEAST8_MAX # define __INT_LEAST8_MAX INT40_MAX # undef __UINT_LEAST8_MAX # define __UINT_LEAST8_MAX UINT40_MAX /* FIXME: This is using the placeholder dates Clang produces for these macros in C2x mode; switch to the correct values once they've been published. */ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L # define UINT40_WIDTH 40 # define INT40_WIDTH UINT40_WIDTH # define UINT_LEAST40_WIDTH UINT40_WIDTH # define INT_LEAST40_WIDTH UINT_LEAST40_WIDTH # define UINT_FAST40_WIDTH UINT40_WIDTH # define INT_FAST40_WIDTH UINT_FAST40_WIDTH # undef __UINT_LEAST32_WIDTH # define __UINT_LEAST32_WIDTH UINT40_WIDTH # undef __UINT_LEAST16_WIDTH # define __UINT_LEAST16_WIDTH UINT40_WIDTH # undef __UINT_LEAST8_WIDTH # define __UINT_LEAST8_WIDTH UINT40_WIDTH #endif /* __STDC_VERSION__ */ #endif /* __INT40_TYPE__ */ #ifdef __INT32_TYPE__ # define INT32_MAX INT32_C(2147483647) # define INT32_MIN (-INT32_C(2147483647)-1) # define UINT32_MAX UINT32_C(4294967295) # undef __INT_LEAST32_MIN # define __INT_LEAST32_MIN INT32_MIN # undef __INT_LEAST32_MAX # define __INT_LEAST32_MAX INT32_MAX # undef __UINT_LEAST32_MAX # define __UINT_LEAST32_MAX UINT32_MAX # undef __INT_LEAST16_MIN # define __INT_LEAST16_MIN INT32_MIN # undef __INT_LEAST16_MAX # define __INT_LEAST16_MAX INT32_MAX # undef __UINT_LEAST16_MAX # define __UINT_LEAST16_MAX UINT32_MAX # undef __INT_LEAST8_MIN # define __INT_LEAST8_MIN INT32_MIN # undef __INT_LEAST8_MAX # define __INT_LEAST8_MAX INT32_MAX # undef __UINT_LEAST8_MAX # define __UINT_LEAST8_MAX UINT32_MAX /* FIXME: This is using the placeholder dates Clang produces for these macros in C2x mode; switch to the correct values once they've been published. */ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L # define UINT32_WIDTH 32 # define INT32_WIDTH UINT32_WIDTH # undef __UINT_LEAST32_WIDTH # define __UINT_LEAST32_WIDTH UINT32_WIDTH # undef __UINT_LEAST16_WIDTH # define __UINT_LEAST16_WIDTH UINT32_WIDTH # undef __UINT_LEAST8_WIDTH # define __UINT_LEAST8_WIDTH UINT32_WIDTH #endif /* __STDC_VERSION__ */ #endif /* __INT32_TYPE__ */ #ifdef __INT_LEAST32_MIN # define INT_LEAST32_MIN __INT_LEAST32_MIN # define INT_LEAST32_MAX __INT_LEAST32_MAX # define UINT_LEAST32_MAX __UINT_LEAST32_MAX # define INT_FAST32_MIN __INT_LEAST32_MIN # define INT_FAST32_MAX __INT_LEAST32_MAX # define UINT_FAST32_MAX __UINT_LEAST32_MAX /* FIXME: This is using the placeholder dates Clang produces for these macros in C2x mode; switch to the correct values once they've been published. */ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L # define UINT_LEAST32_WIDTH __UINT_LEAST32_WIDTH # define INT_LEAST32_WIDTH UINT_LEAST32_WIDTH # define UINT_FAST32_WIDTH __UINT_LEAST32_WIDTH # define INT_FAST32_WIDTH UINT_FAST32_WIDTH #endif /* __STDC_VERSION__ */ #endif /* __INT_LEAST32_MIN */ #ifdef __INT24_TYPE__ # define INT24_MAX INT24_C(8388607) # define INT24_MIN (-INT24_C(8388607)-1) # define UINT24_MAX UINT24_C(16777215) # define INT_LEAST24_MIN INT24_MIN # define INT_LEAST24_MAX INT24_MAX # define UINT_LEAST24_MAX UINT24_MAX # define INT_FAST24_MIN INT24_MIN # define INT_FAST24_MAX INT24_MAX # define UINT_FAST24_MAX UINT24_MAX # undef __INT_LEAST16_MIN # define __INT_LEAST16_MIN INT24_MIN # undef __INT_LEAST16_MAX # define __INT_LEAST16_MAX INT24_MAX # undef __UINT_LEAST16_MAX # define __UINT_LEAST16_MAX UINT24_MAX # undef __INT_LEAST8_MIN # define __INT_LEAST8_MIN INT24_MIN # undef __INT_LEAST8_MAX # define __INT_LEAST8_MAX INT24_MAX # undef __UINT_LEAST8_MAX # define __UINT_LEAST8_MAX UINT24_MAX /* FIXME: This is using the placeholder dates Clang produces for these macros in C2x mode; switch to the correct values once they've been published. */ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L # define UINT24_WIDTH 24 # define INT24_WIDTH UINT24_WIDTH # define UINT_LEAST24_WIDTH UINT24_WIDTH # define INT_LEAST24_WIDTH UINT_LEAST24_WIDTH # define UINT_FAST24_WIDTH UINT24_WIDTH # define INT_FAST24_WIDTH UINT_FAST24_WIDTH # undef __UINT_LEAST16_WIDTH # define __UINT_LEAST16_WIDTH UINT24_WIDTH # undef __UINT_LEAST8_WIDTH # define __UINT_LEAST8_WIDTH UINT24_WIDTH #endif /* __STDC_VERSION__ */ #endif /* __INT24_TYPE__ */ #ifdef __INT16_TYPE__ #define INT16_MAX INT16_C(32767) #define INT16_MIN (-INT16_C(32767)-1) #define UINT16_MAX UINT16_C(65535) # undef __INT_LEAST16_MIN # define __INT_LEAST16_MIN INT16_MIN # undef __INT_LEAST16_MAX # define __INT_LEAST16_MAX INT16_MAX # undef __UINT_LEAST16_MAX # define __UINT_LEAST16_MAX UINT16_MAX # undef __INT_LEAST8_MIN # define __INT_LEAST8_MIN INT16_MIN # undef __INT_LEAST8_MAX # define __INT_LEAST8_MAX INT16_MAX # undef __UINT_LEAST8_MAX # define __UINT_LEAST8_MAX UINT16_MAX /* FIXME: This is using the placeholder dates Clang produces for these macros in C2x mode; switch to the correct values once they've been published. */ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L # define UINT16_WIDTH 16 # define INT16_WIDTH UINT16_WIDTH # undef __UINT_LEAST16_WIDTH # define __UINT_LEAST16_WIDTH UINT16_WIDTH # undef __UINT_LEAST8_WIDTH # define __UINT_LEAST8_WIDTH UINT16_WIDTH #endif /* __STDC_VERSION__ */ #endif /* __INT16_TYPE__ */ #ifdef __INT_LEAST16_MIN # define INT_LEAST16_MIN __INT_LEAST16_MIN # define INT_LEAST16_MAX __INT_LEAST16_MAX # define UINT_LEAST16_MAX __UINT_LEAST16_MAX # define INT_FAST16_MIN __INT_LEAST16_MIN # define INT_FAST16_MAX __INT_LEAST16_MAX # define UINT_FAST16_MAX __UINT_LEAST16_MAX /* FIXME: This is using the placeholder dates Clang produces for these macros in C2x mode; switch to the correct values once they've been published. */ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L # define UINT_LEAST16_WIDTH __UINT_LEAST16_WIDTH # define INT_LEAST16_WIDTH UINT_LEAST16_WIDTH # define UINT_FAST16_WIDTH __UINT_LEAST16_WIDTH # define INT_FAST16_WIDTH UINT_FAST16_WIDTH #endif /* __STDC_VERSION__ */ #endif /* __INT_LEAST16_MIN */ #ifdef __INT8_TYPE__ # define INT8_MAX INT8_C(127) # define INT8_MIN (-INT8_C(127)-1) # define UINT8_MAX UINT8_C(255) # undef __INT_LEAST8_MIN # define __INT_LEAST8_MIN INT8_MIN # undef __INT_LEAST8_MAX # define __INT_LEAST8_MAX INT8_MAX # undef __UINT_LEAST8_MAX # define __UINT_LEAST8_MAX UINT8_MAX /* FIXME: This is using the placeholder dates Clang produces for these macros in C2x mode; switch to the correct values once they've been published. */ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L # define UINT8_WIDTH 8 # define INT8_WIDTH UINT8_WIDTH # undef __UINT_LEAST8_WIDTH # define __UINT_LEAST8_WIDTH UINT8_WIDTH #endif /* __STDC_VERSION__ */ #endif /* __INT8_TYPE__ */ #ifdef __INT_LEAST8_MIN # define INT_LEAST8_MIN __INT_LEAST8_MIN # define INT_LEAST8_MAX __INT_LEAST8_MAX # define UINT_LEAST8_MAX __UINT_LEAST8_MAX # define INT_FAST8_MIN __INT_LEAST8_MIN # define INT_FAST8_MAX __INT_LEAST8_MAX # define UINT_FAST8_MAX __UINT_LEAST8_MAX /* FIXME: This is using the placeholder dates Clang produces for these macros in C2x mode; switch to the correct values once they've been published. */ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L # define UINT_LEAST8_WIDTH __UINT_LEAST8_WIDTH # define INT_LEAST8_WIDTH UINT_LEAST8_WIDTH # define UINT_FAST8_WIDTH __UINT_LEAST8_WIDTH # define INT_FAST8_WIDTH UINT_FAST8_WIDTH #endif /* __STDC_VERSION__ */ #endif /* __INT_LEAST8_MIN */ /* Some utility macros */ #define __INTN_MIN(n) __stdint_join3( INT, n, _MIN) #define __INTN_MAX(n) __stdint_join3( INT, n, _MAX) #define __UINTN_MAX(n) __stdint_join3(UINT, n, _MAX) #define __INTN_C(n, v) __stdint_join3( INT, n, _C(v)) #define __UINTN_C(n, v) __stdint_join3(UINT, n, _C(v)) /* C99 7.18.2.4 Limits of integer types capable of holding object pointers. */ /* C99 7.18.3 Limits of other integer types. */ #define INTPTR_MIN (-__INTPTR_MAX__-1) #define INTPTR_MAX __INTPTR_MAX__ #define UINTPTR_MAX __UINTPTR_MAX__ #define PTRDIFF_MIN (-__PTRDIFF_MAX__-1) #define PTRDIFF_MAX __PTRDIFF_MAX__ #define SIZE_MAX __SIZE_MAX__ /* C2x 7.20.2.4 Width of integer types capable of holding object pointers. */ /* FIXME: This is using the placeholder dates Clang produces for these macros in C2x mode; switch to the correct values once they've been published. */ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L /* NB: The C standard requires that these be the same value, but the compiler exposes separate internal width macros. */ #define INTPTR_WIDTH __INTPTR_WIDTH__ #define UINTPTR_WIDTH __UINTPTR_WIDTH__ #endif /* ISO9899:2011 7.20 (C11 Annex K): Define RSIZE_MAX if __STDC_WANT_LIB_EXT1__ * is enabled. */ #if defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1 #define RSIZE_MAX (SIZE_MAX >> 1) #endif /* C99 7.18.2.5 Limits of greatest-width integer types. */ #define INTMAX_MIN (-__INTMAX_MAX__-1) #define INTMAX_MAX __INTMAX_MAX__ #define UINTMAX_MAX __UINTMAX_MAX__ /* C2x 7.20.2.5 Width of greatest-width integer types. */ /* FIXME: This is using the placeholder dates Clang produces for these macros in C2x mode; switch to the correct values once they've been published. */ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L /* NB: The C standard requires that these be the same value, but the compiler exposes separate internal width macros. */ #define INTMAX_WIDTH __INTMAX_WIDTH__ #define UINTMAX_WIDTH __UINTMAX_WIDTH__ #endif /* C99 7.18.3 Limits of other integer types. */ #define SIG_ATOMIC_MIN __INTN_MIN(__SIG_ATOMIC_WIDTH__) #define SIG_ATOMIC_MAX __INTN_MAX(__SIG_ATOMIC_WIDTH__) #ifdef __WINT_UNSIGNED__ # define WINT_MIN __UINTN_C(__WINT_WIDTH__, 0) # define WINT_MAX __UINTN_MAX(__WINT_WIDTH__) #else # define WINT_MIN __INTN_MIN(__WINT_WIDTH__) # define WINT_MAX __INTN_MAX(__WINT_WIDTH__) #endif #ifndef WCHAR_MAX # define WCHAR_MAX __WCHAR_MAX__ #endif #ifndef WCHAR_MIN # if __WCHAR_MAX__ == __INTN_MAX(__WCHAR_WIDTH__) # define WCHAR_MIN __INTN_MIN(__WCHAR_WIDTH__) # else # define WCHAR_MIN __UINTN_C(__WCHAR_WIDTH__, 0) # endif #endif /* 7.18.4.2 Macros for greatest-width integer constants. */ #define INTMAX_C(v) __int_c(v, __INTMAX_C_SUFFIX__) #define UINTMAX_C(v) __int_c(v, __UINTMAX_C_SUFFIX__) /* C2x 7.20.3.x Width of other integer types. */ /* FIXME: This is using the placeholder dates Clang produces for these macros in C2x mode; switch to the correct values once they've been published. */ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L #define PTRDIFF_WIDTH __PTRDIFF_WIDTH__ #define SIG_ATOMIC_WIDTH __SIG_ATOMIC_WIDTH__ #define SIZE_WIDTH __SIZE_WIDTH__ #define WCHAR_WIDTH __WCHAR_WIDTH__ #define WINT_WIDTH __WINT_WIDTH__ #endif #endif /* __STDC_HOSTED__ */ #endif /* __CLANG_STDINT_H2 */ /*===---- stdnoreturn.h - Standard header for noreturn macro ---------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __STDNORETURN_H #define __STDNORETURN_H #define noreturn _Noreturn #define __noreturn_is_defined 1 #if (defined(__STDC_VERSION__) && __STDC_VERSION__ > 201710L) && \ !defined(_CLANG_DISABLE_CRT_DEPRECATION_WARNINGS) /* The noreturn macro is deprecated in C2x. We do not mark it as such because including the header file in C2x is also deprecated and we do not want to issue a confusing diagnostic for code which includes followed by code that writes [[noreturn]]. The issue with such code is not with the attribute, or the use of 'noreturn', but the inclusion of the header. */ /* FIXME: We should be issuing a deprecation warning here, but cannot yet due * to system headers which include this header file unconditionally. */ #endif #endif /* __STDNORETURN_H */ /*===---- tbmintrin.h - TBM intrinsics -------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __X86INTRIN_H #error "Never use directly; include instead." #endif #ifndef __TBMINTRIN_H #define __TBMINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("tbm"))) #define __bextri_u32(a, b) \ ((unsigned int)__builtin_ia32_bextri_u32((unsigned int)(a), \ (unsigned int)(b))) static __inline__ unsigned int __DEFAULT_FN_ATTRS __blcfill_u32(unsigned int __a) { return __a & (__a + 1); } static __inline__ unsigned int __DEFAULT_FN_ATTRS __blci_u32(unsigned int __a) { return __a | ~(__a + 1); } static __inline__ unsigned int __DEFAULT_FN_ATTRS __blcic_u32(unsigned int __a) { return ~__a & (__a + 1); } static __inline__ unsigned int __DEFAULT_FN_ATTRS __blcmsk_u32(unsigned int __a) { return __a ^ (__a + 1); } static __inline__ unsigned int __DEFAULT_FN_ATTRS __blcs_u32(unsigned int __a) { return __a | (__a + 1); } static __inline__ unsigned int __DEFAULT_FN_ATTRS __blsfill_u32(unsigned int __a) { return __a | (__a - 1); } static __inline__ unsigned int __DEFAULT_FN_ATTRS __blsic_u32(unsigned int __a) { return ~__a | (__a - 1); } static __inline__ unsigned int __DEFAULT_FN_ATTRS __t1mskc_u32(unsigned int __a) { return ~__a | (__a + 1); } static __inline__ unsigned int __DEFAULT_FN_ATTRS __tzmsk_u32(unsigned int __a) { return ~__a & (__a - 1); } #ifdef __x86_64__ #define __bextri_u64(a, b) \ ((unsigned long long)__builtin_ia32_bextri_u64((unsigned long long)(a), \ (unsigned long long)(b))) static __inline__ unsigned long long __DEFAULT_FN_ATTRS __blcfill_u64(unsigned long long __a) { return __a & (__a + 1); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS __blci_u64(unsigned long long __a) { return __a | ~(__a + 1); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS __blcic_u64(unsigned long long __a) { return ~__a & (__a + 1); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS __blcmsk_u64(unsigned long long __a) { return __a ^ (__a + 1); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS __blcs_u64(unsigned long long __a) { return __a | (__a + 1); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS __blsfill_u64(unsigned long long __a) { return __a | (__a - 1); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS __blsic_u64(unsigned long long __a) { return ~__a | (__a - 1); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS __t1mskc_u64(unsigned long long __a) { return ~__a | (__a + 1); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS __tzmsk_u64(unsigned long long __a) { return ~__a & (__a - 1); } #endif #undef __DEFAULT_FN_ATTRS #endif /* __TBMINTRIN_H */ /*===---- tgmath.h - Standard header for type generic math ----------------===*\ * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * \*===----------------------------------------------------------------------===*/ #ifndef __CLANG_TGMATH_H #define __CLANG_TGMATH_H /* C99 7.22 Type-generic math . */ #include /* * Allow additional definitions and implementation-defined values on Apple * platforms. This is done after #include to avoid depcycle conflicts * between libcxx and darwin in C++ modules builds. */ #if defined(__APPLE__) && __STDC_HOSTED__ && __has_include_next() # include_next #else /* C++ handles type genericity with overloading in math.h. */ #ifndef __cplusplus #include #define _TG_ATTRSp __attribute__((__overloadable__)) #define _TG_ATTRS __attribute__((__overloadable__, __always_inline__)) // promotion typedef void _Argument_type_is_not_arithmetic; static _Argument_type_is_not_arithmetic __tg_promote(...) __attribute__((__unavailable__,__overloadable__)); static double _TG_ATTRSp __tg_promote(int); static double _TG_ATTRSp __tg_promote(unsigned int); static double _TG_ATTRSp __tg_promote(long); static double _TG_ATTRSp __tg_promote(unsigned long); static double _TG_ATTRSp __tg_promote(long long); static double _TG_ATTRSp __tg_promote(unsigned long long); static float _TG_ATTRSp __tg_promote(float); static double _TG_ATTRSp __tg_promote(double); static long double _TG_ATTRSp __tg_promote(long double); static float _Complex _TG_ATTRSp __tg_promote(float _Complex); static double _Complex _TG_ATTRSp __tg_promote(double _Complex); static long double _Complex _TG_ATTRSp __tg_promote(long double _Complex); #define __tg_promote1(__x) (__typeof__(__tg_promote(__x))) #define __tg_promote2(__x, __y) (__typeof__(__tg_promote(__x) + \ __tg_promote(__y))) #define __tg_promote3(__x, __y, __z) (__typeof__(__tg_promote(__x) + \ __tg_promote(__y) + \ __tg_promote(__z))) // acos static float _TG_ATTRS __tg_acos(float __x) {return acosf(__x);} static double _TG_ATTRS __tg_acos(double __x) {return acos(__x);} static long double _TG_ATTRS __tg_acos(long double __x) {return acosl(__x);} static float _Complex _TG_ATTRS __tg_acos(float _Complex __x) {return cacosf(__x);} static double _Complex _TG_ATTRS __tg_acos(double _Complex __x) {return cacos(__x);} static long double _Complex _TG_ATTRS __tg_acos(long double _Complex __x) {return cacosl(__x);} #undef acos #define acos(__x) __tg_acos(__tg_promote1((__x))(__x)) // asin static float _TG_ATTRS __tg_asin(float __x) {return asinf(__x);} static double _TG_ATTRS __tg_asin(double __x) {return asin(__x);} static long double _TG_ATTRS __tg_asin(long double __x) {return asinl(__x);} static float _Complex _TG_ATTRS __tg_asin(float _Complex __x) {return casinf(__x);} static double _Complex _TG_ATTRS __tg_asin(double _Complex __x) {return casin(__x);} static long double _Complex _TG_ATTRS __tg_asin(long double _Complex __x) {return casinl(__x);} #undef asin #define asin(__x) __tg_asin(__tg_promote1((__x))(__x)) // atan static float _TG_ATTRS __tg_atan(float __x) {return atanf(__x);} static double _TG_ATTRS __tg_atan(double __x) {return atan(__x);} static long double _TG_ATTRS __tg_atan(long double __x) {return atanl(__x);} static float _Complex _TG_ATTRS __tg_atan(float _Complex __x) {return catanf(__x);} static double _Complex _TG_ATTRS __tg_atan(double _Complex __x) {return catan(__x);} static long double _Complex _TG_ATTRS __tg_atan(long double _Complex __x) {return catanl(__x);} #undef atan #define atan(__x) __tg_atan(__tg_promote1((__x))(__x)) // acosh static float _TG_ATTRS __tg_acosh(float __x) {return acoshf(__x);} static double _TG_ATTRS __tg_acosh(double __x) {return acosh(__x);} static long double _TG_ATTRS __tg_acosh(long double __x) {return acoshl(__x);} static float _Complex _TG_ATTRS __tg_acosh(float _Complex __x) {return cacoshf(__x);} static double _Complex _TG_ATTRS __tg_acosh(double _Complex __x) {return cacosh(__x);} static long double _Complex _TG_ATTRS __tg_acosh(long double _Complex __x) {return cacoshl(__x);} #undef acosh #define acosh(__x) __tg_acosh(__tg_promote1((__x))(__x)) // asinh static float _TG_ATTRS __tg_asinh(float __x) {return asinhf(__x);} static double _TG_ATTRS __tg_asinh(double __x) {return asinh(__x);} static long double _TG_ATTRS __tg_asinh(long double __x) {return asinhl(__x);} static float _Complex _TG_ATTRS __tg_asinh(float _Complex __x) {return casinhf(__x);} static double _Complex _TG_ATTRS __tg_asinh(double _Complex __x) {return casinh(__x);} static long double _Complex _TG_ATTRS __tg_asinh(long double _Complex __x) {return casinhl(__x);} #undef asinh #define asinh(__x) __tg_asinh(__tg_promote1((__x))(__x)) // atanh static float _TG_ATTRS __tg_atanh(float __x) {return atanhf(__x);} static double _TG_ATTRS __tg_atanh(double __x) {return atanh(__x);} static long double _TG_ATTRS __tg_atanh(long double __x) {return atanhl(__x);} static float _Complex _TG_ATTRS __tg_atanh(float _Complex __x) {return catanhf(__x);} static double _Complex _TG_ATTRS __tg_atanh(double _Complex __x) {return catanh(__x);} static long double _Complex _TG_ATTRS __tg_atanh(long double _Complex __x) {return catanhl(__x);} #undef atanh #define atanh(__x) __tg_atanh(__tg_promote1((__x))(__x)) // cos static float _TG_ATTRS __tg_cos(float __x) {return cosf(__x);} static double _TG_ATTRS __tg_cos(double __x) {return cos(__x);} static long double _TG_ATTRS __tg_cos(long double __x) {return cosl(__x);} static float _Complex _TG_ATTRS __tg_cos(float _Complex __x) {return ccosf(__x);} static double _Complex _TG_ATTRS __tg_cos(double _Complex __x) {return ccos(__x);} static long double _Complex _TG_ATTRS __tg_cos(long double _Complex __x) {return ccosl(__x);} #undef cos #define cos(__x) __tg_cos(__tg_promote1((__x))(__x)) // sin static float _TG_ATTRS __tg_sin(float __x) {return sinf(__x);} static double _TG_ATTRS __tg_sin(double __x) {return sin(__x);} static long double _TG_ATTRS __tg_sin(long double __x) {return sinl(__x);} static float _Complex _TG_ATTRS __tg_sin(float _Complex __x) {return csinf(__x);} static double _Complex _TG_ATTRS __tg_sin(double _Complex __x) {return csin(__x);} static long double _Complex _TG_ATTRS __tg_sin(long double _Complex __x) {return csinl(__x);} #undef sin #define sin(__x) __tg_sin(__tg_promote1((__x))(__x)) // tan static float _TG_ATTRS __tg_tan(float __x) {return tanf(__x);} static double _TG_ATTRS __tg_tan(double __x) {return tan(__x);} static long double _TG_ATTRS __tg_tan(long double __x) {return tanl(__x);} static float _Complex _TG_ATTRS __tg_tan(float _Complex __x) {return ctanf(__x);} static double _Complex _TG_ATTRS __tg_tan(double _Complex __x) {return ctan(__x);} static long double _Complex _TG_ATTRS __tg_tan(long double _Complex __x) {return ctanl(__x);} #undef tan #define tan(__x) __tg_tan(__tg_promote1((__x))(__x)) // cosh static float _TG_ATTRS __tg_cosh(float __x) {return coshf(__x);} static double _TG_ATTRS __tg_cosh(double __x) {return cosh(__x);} static long double _TG_ATTRS __tg_cosh(long double __x) {return coshl(__x);} static float _Complex _TG_ATTRS __tg_cosh(float _Complex __x) {return ccoshf(__x);} static double _Complex _TG_ATTRS __tg_cosh(double _Complex __x) {return ccosh(__x);} static long double _Complex _TG_ATTRS __tg_cosh(long double _Complex __x) {return ccoshl(__x);} #undef cosh #define cosh(__x) __tg_cosh(__tg_promote1((__x))(__x)) // sinh static float _TG_ATTRS __tg_sinh(float __x) {return sinhf(__x);} static double _TG_ATTRS __tg_sinh(double __x) {return sinh(__x);} static long double _TG_ATTRS __tg_sinh(long double __x) {return sinhl(__x);} static float _Complex _TG_ATTRS __tg_sinh(float _Complex __x) {return csinhf(__x);} static double _Complex _TG_ATTRS __tg_sinh(double _Complex __x) {return csinh(__x);} static long double _Complex _TG_ATTRS __tg_sinh(long double _Complex __x) {return csinhl(__x);} #undef sinh #define sinh(__x) __tg_sinh(__tg_promote1((__x))(__x)) // tanh static float _TG_ATTRS __tg_tanh(float __x) {return tanhf(__x);} static double _TG_ATTRS __tg_tanh(double __x) {return tanh(__x);} static long double _TG_ATTRS __tg_tanh(long double __x) {return tanhl(__x);} static float _Complex _TG_ATTRS __tg_tanh(float _Complex __x) {return ctanhf(__x);} static double _Complex _TG_ATTRS __tg_tanh(double _Complex __x) {return ctanh(__x);} static long double _Complex _TG_ATTRS __tg_tanh(long double _Complex __x) {return ctanhl(__x);} #undef tanh #define tanh(__x) __tg_tanh(__tg_promote1((__x))(__x)) // exp static float _TG_ATTRS __tg_exp(float __x) {return expf(__x);} static double _TG_ATTRS __tg_exp(double __x) {return exp(__x);} static long double _TG_ATTRS __tg_exp(long double __x) {return expl(__x);} static float _Complex _TG_ATTRS __tg_exp(float _Complex __x) {return cexpf(__x);} static double _Complex _TG_ATTRS __tg_exp(double _Complex __x) {return cexp(__x);} static long double _Complex _TG_ATTRS __tg_exp(long double _Complex __x) {return cexpl(__x);} #undef exp #define exp(__x) __tg_exp(__tg_promote1((__x))(__x)) // log static float _TG_ATTRS __tg_log(float __x) {return logf(__x);} static double _TG_ATTRS __tg_log(double __x) {return log(__x);} static long double _TG_ATTRS __tg_log(long double __x) {return logl(__x);} static float _Complex _TG_ATTRS __tg_log(float _Complex __x) {return clogf(__x);} static double _Complex _TG_ATTRS __tg_log(double _Complex __x) {return clog(__x);} static long double _Complex _TG_ATTRS __tg_log(long double _Complex __x) {return clogl(__x);} #undef log #define log(__x) __tg_log(__tg_promote1((__x))(__x)) // pow static float _TG_ATTRS __tg_pow(float __x, float __y) {return powf(__x, __y);} static double _TG_ATTRS __tg_pow(double __x, double __y) {return pow(__x, __y);} static long double _TG_ATTRS __tg_pow(long double __x, long double __y) {return powl(__x, __y);} static float _Complex _TG_ATTRS __tg_pow(float _Complex __x, float _Complex __y) {return cpowf(__x, __y);} static double _Complex _TG_ATTRS __tg_pow(double _Complex __x, double _Complex __y) {return cpow(__x, __y);} static long double _Complex _TG_ATTRS __tg_pow(long double _Complex __x, long double _Complex __y) {return cpowl(__x, __y);} #undef pow #define pow(__x, __y) __tg_pow(__tg_promote2((__x), (__y))(__x), \ __tg_promote2((__x), (__y))(__y)) // sqrt static float _TG_ATTRS __tg_sqrt(float __x) {return sqrtf(__x);} static double _TG_ATTRS __tg_sqrt(double __x) {return sqrt(__x);} static long double _TG_ATTRS __tg_sqrt(long double __x) {return sqrtl(__x);} static float _Complex _TG_ATTRS __tg_sqrt(float _Complex __x) {return csqrtf(__x);} static double _Complex _TG_ATTRS __tg_sqrt(double _Complex __x) {return csqrt(__x);} static long double _Complex _TG_ATTRS __tg_sqrt(long double _Complex __x) {return csqrtl(__x);} #undef sqrt #define sqrt(__x) __tg_sqrt(__tg_promote1((__x))(__x)) // fabs static float _TG_ATTRS __tg_fabs(float __x) {return fabsf(__x);} static double _TG_ATTRS __tg_fabs(double __x) {return fabs(__x);} static long double _TG_ATTRS __tg_fabs(long double __x) {return fabsl(__x);} static float _TG_ATTRS __tg_fabs(float _Complex __x) {return cabsf(__x);} static double _TG_ATTRS __tg_fabs(double _Complex __x) {return cabs(__x);} static long double _TG_ATTRS __tg_fabs(long double _Complex __x) {return cabsl(__x);} #undef fabs #define fabs(__x) __tg_fabs(__tg_promote1((__x))(__x)) // atan2 static float _TG_ATTRS __tg_atan2(float __x, float __y) {return atan2f(__x, __y);} static double _TG_ATTRS __tg_atan2(double __x, double __y) {return atan2(__x, __y);} static long double _TG_ATTRS __tg_atan2(long double __x, long double __y) {return atan2l(__x, __y);} #undef atan2 #define atan2(__x, __y) __tg_atan2(__tg_promote2((__x), (__y))(__x), \ __tg_promote2((__x), (__y))(__y)) // cbrt static float _TG_ATTRS __tg_cbrt(float __x) {return cbrtf(__x);} static double _TG_ATTRS __tg_cbrt(double __x) {return cbrt(__x);} static long double _TG_ATTRS __tg_cbrt(long double __x) {return cbrtl(__x);} #undef cbrt #define cbrt(__x) __tg_cbrt(__tg_promote1((__x))(__x)) // ceil static float _TG_ATTRS __tg_ceil(float __x) {return ceilf(__x);} static double _TG_ATTRS __tg_ceil(double __x) {return ceil(__x);} static long double _TG_ATTRS __tg_ceil(long double __x) {return ceill(__x);} #undef ceil #define ceil(__x) __tg_ceil(__tg_promote1((__x))(__x)) // copysign static float _TG_ATTRS __tg_copysign(float __x, float __y) {return copysignf(__x, __y);} static double _TG_ATTRS __tg_copysign(double __x, double __y) {return copysign(__x, __y);} static long double _TG_ATTRS __tg_copysign(long double __x, long double __y) {return copysignl(__x, __y);} #undef copysign #define copysign(__x, __y) __tg_copysign(__tg_promote2((__x), (__y))(__x), \ __tg_promote2((__x), (__y))(__y)) // erf static float _TG_ATTRS __tg_erf(float __x) {return erff(__x);} static double _TG_ATTRS __tg_erf(double __x) {return erf(__x);} static long double _TG_ATTRS __tg_erf(long double __x) {return erfl(__x);} #undef erf #define erf(__x) __tg_erf(__tg_promote1((__x))(__x)) // erfc static float _TG_ATTRS __tg_erfc(float __x) {return erfcf(__x);} static double _TG_ATTRS __tg_erfc(double __x) {return erfc(__x);} static long double _TG_ATTRS __tg_erfc(long double __x) {return erfcl(__x);} #undef erfc #define erfc(__x) __tg_erfc(__tg_promote1((__x))(__x)) // exp2 static float _TG_ATTRS __tg_exp2(float __x) {return exp2f(__x);} static double _TG_ATTRS __tg_exp2(double __x) {return exp2(__x);} static long double _TG_ATTRS __tg_exp2(long double __x) {return exp2l(__x);} #undef exp2 #define exp2(__x) __tg_exp2(__tg_promote1((__x))(__x)) // expm1 static float _TG_ATTRS __tg_expm1(float __x) {return expm1f(__x);} static double _TG_ATTRS __tg_expm1(double __x) {return expm1(__x);} static long double _TG_ATTRS __tg_expm1(long double __x) {return expm1l(__x);} #undef expm1 #define expm1(__x) __tg_expm1(__tg_promote1((__x))(__x)) // fdim static float _TG_ATTRS __tg_fdim(float __x, float __y) {return fdimf(__x, __y);} static double _TG_ATTRS __tg_fdim(double __x, double __y) {return fdim(__x, __y);} static long double _TG_ATTRS __tg_fdim(long double __x, long double __y) {return fdiml(__x, __y);} #undef fdim #define fdim(__x, __y) __tg_fdim(__tg_promote2((__x), (__y))(__x), \ __tg_promote2((__x), (__y))(__y)) // floor static float _TG_ATTRS __tg_floor(float __x) {return floorf(__x);} static double _TG_ATTRS __tg_floor(double __x) {return floor(__x);} static long double _TG_ATTRS __tg_floor(long double __x) {return floorl(__x);} #undef floor #define floor(__x) __tg_floor(__tg_promote1((__x))(__x)) // fma static float _TG_ATTRS __tg_fma(float __x, float __y, float __z) {return fmaf(__x, __y, __z);} static double _TG_ATTRS __tg_fma(double __x, double __y, double __z) {return fma(__x, __y, __z);} static long double _TG_ATTRS __tg_fma(long double __x,long double __y, long double __z) {return fmal(__x, __y, __z);} #undef fma #define fma(__x, __y, __z) \ __tg_fma(__tg_promote3((__x), (__y), (__z))(__x), \ __tg_promote3((__x), (__y), (__z))(__y), \ __tg_promote3((__x), (__y), (__z))(__z)) // fmax static float _TG_ATTRS __tg_fmax(float __x, float __y) {return fmaxf(__x, __y);} static double _TG_ATTRS __tg_fmax(double __x, double __y) {return fmax(__x, __y);} static long double _TG_ATTRS __tg_fmax(long double __x, long double __y) {return fmaxl(__x, __y);} #undef fmax #define fmax(__x, __y) __tg_fmax(__tg_promote2((__x), (__y))(__x), \ __tg_promote2((__x), (__y))(__y)) // fmin static float _TG_ATTRS __tg_fmin(float __x, float __y) {return fminf(__x, __y);} static double _TG_ATTRS __tg_fmin(double __x, double __y) {return fmin(__x, __y);} static long double _TG_ATTRS __tg_fmin(long double __x, long double __y) {return fminl(__x, __y);} #undef fmin #define fmin(__x, __y) __tg_fmin(__tg_promote2((__x), (__y))(__x), \ __tg_promote2((__x), (__y))(__y)) // fmod static float _TG_ATTRS __tg_fmod(float __x, float __y) {return fmodf(__x, __y);} static double _TG_ATTRS __tg_fmod(double __x, double __y) {return fmod(__x, __y);} static long double _TG_ATTRS __tg_fmod(long double __x, long double __y) {return fmodl(__x, __y);} #undef fmod #define fmod(__x, __y) __tg_fmod(__tg_promote2((__x), (__y))(__x), \ __tg_promote2((__x), (__y))(__y)) // frexp static float _TG_ATTRS __tg_frexp(float __x, int* __y) {return frexpf(__x, __y);} static double _TG_ATTRS __tg_frexp(double __x, int* __y) {return frexp(__x, __y);} static long double _TG_ATTRS __tg_frexp(long double __x, int* __y) {return frexpl(__x, __y);} #undef frexp #define frexp(__x, __y) __tg_frexp(__tg_promote1((__x))(__x), __y) // hypot static float _TG_ATTRS __tg_hypot(float __x, float __y) {return hypotf(__x, __y);} static double _TG_ATTRS __tg_hypot(double __x, double __y) {return hypot(__x, __y);} static long double _TG_ATTRS __tg_hypot(long double __x, long double __y) {return hypotl(__x, __y);} #undef hypot #define hypot(__x, __y) __tg_hypot(__tg_promote2((__x), (__y))(__x), \ __tg_promote2((__x), (__y))(__y)) // ilogb static int _TG_ATTRS __tg_ilogb(float __x) {return ilogbf(__x);} static int _TG_ATTRS __tg_ilogb(double __x) {return ilogb(__x);} static int _TG_ATTRS __tg_ilogb(long double __x) {return ilogbl(__x);} #undef ilogb #define ilogb(__x) __tg_ilogb(__tg_promote1((__x))(__x)) // ldexp static float _TG_ATTRS __tg_ldexp(float __x, int __y) {return ldexpf(__x, __y);} static double _TG_ATTRS __tg_ldexp(double __x, int __y) {return ldexp(__x, __y);} static long double _TG_ATTRS __tg_ldexp(long double __x, int __y) {return ldexpl(__x, __y);} #undef ldexp #define ldexp(__x, __y) __tg_ldexp(__tg_promote1((__x))(__x), __y) // lgamma static float _TG_ATTRS __tg_lgamma(float __x) {return lgammaf(__x);} static double _TG_ATTRS __tg_lgamma(double __x) {return lgamma(__x);} static long double _TG_ATTRS __tg_lgamma(long double __x) {return lgammal(__x);} #undef lgamma #define lgamma(__x) __tg_lgamma(__tg_promote1((__x))(__x)) // llrint static long long _TG_ATTRS __tg_llrint(float __x) {return llrintf(__x);} static long long _TG_ATTRS __tg_llrint(double __x) {return llrint(__x);} static long long _TG_ATTRS __tg_llrint(long double __x) {return llrintl(__x);} #undef llrint #define llrint(__x) __tg_llrint(__tg_promote1((__x))(__x)) // llround static long long _TG_ATTRS __tg_llround(float __x) {return llroundf(__x);} static long long _TG_ATTRS __tg_llround(double __x) {return llround(__x);} static long long _TG_ATTRS __tg_llround(long double __x) {return llroundl(__x);} #undef llround #define llround(__x) __tg_llround(__tg_promote1((__x))(__x)) // log10 static float _TG_ATTRS __tg_log10(float __x) {return log10f(__x);} static double _TG_ATTRS __tg_log10(double __x) {return log10(__x);} static long double _TG_ATTRS __tg_log10(long double __x) {return log10l(__x);} #undef log10 #define log10(__x) __tg_log10(__tg_promote1((__x))(__x)) // log1p static float _TG_ATTRS __tg_log1p(float __x) {return log1pf(__x);} static double _TG_ATTRS __tg_log1p(double __x) {return log1p(__x);} static long double _TG_ATTRS __tg_log1p(long double __x) {return log1pl(__x);} #undef log1p #define log1p(__x) __tg_log1p(__tg_promote1((__x))(__x)) // log2 static float _TG_ATTRS __tg_log2(float __x) {return log2f(__x);} static double _TG_ATTRS __tg_log2(double __x) {return log2(__x);} static long double _TG_ATTRS __tg_log2(long double __x) {return log2l(__x);} #undef log2 #define log2(__x) __tg_log2(__tg_promote1((__x))(__x)) // logb static float _TG_ATTRS __tg_logb(float __x) {return logbf(__x);} static double _TG_ATTRS __tg_logb(double __x) {return logb(__x);} static long double _TG_ATTRS __tg_logb(long double __x) {return logbl(__x);} #undef logb #define logb(__x) __tg_logb(__tg_promote1((__x))(__x)) // lrint static long _TG_ATTRS __tg_lrint(float __x) {return lrintf(__x);} static long _TG_ATTRS __tg_lrint(double __x) {return lrint(__x);} static long _TG_ATTRS __tg_lrint(long double __x) {return lrintl(__x);} #undef lrint #define lrint(__x) __tg_lrint(__tg_promote1((__x))(__x)) // lround static long _TG_ATTRS __tg_lround(float __x) {return lroundf(__x);} static long _TG_ATTRS __tg_lround(double __x) {return lround(__x);} static long _TG_ATTRS __tg_lround(long double __x) {return lroundl(__x);} #undef lround #define lround(__x) __tg_lround(__tg_promote1((__x))(__x)) // nearbyint static float _TG_ATTRS __tg_nearbyint(float __x) {return nearbyintf(__x);} static double _TG_ATTRS __tg_nearbyint(double __x) {return nearbyint(__x);} static long double _TG_ATTRS __tg_nearbyint(long double __x) {return nearbyintl(__x);} #undef nearbyint #define nearbyint(__x) __tg_nearbyint(__tg_promote1((__x))(__x)) // nextafter static float _TG_ATTRS __tg_nextafter(float __x, float __y) {return nextafterf(__x, __y);} static double _TG_ATTRS __tg_nextafter(double __x, double __y) {return nextafter(__x, __y);} static long double _TG_ATTRS __tg_nextafter(long double __x, long double __y) {return nextafterl(__x, __y);} #undef nextafter #define nextafter(__x, __y) __tg_nextafter(__tg_promote2((__x), (__y))(__x), \ __tg_promote2((__x), (__y))(__y)) // nexttoward static float _TG_ATTRS __tg_nexttoward(float __x, long double __y) {return nexttowardf(__x, __y);} static double _TG_ATTRS __tg_nexttoward(double __x, long double __y) {return nexttoward(__x, __y);} static long double _TG_ATTRS __tg_nexttoward(long double __x, long double __y) {return nexttowardl(__x, __y);} #undef nexttoward #define nexttoward(__x, __y) __tg_nexttoward(__tg_promote1((__x))(__x), (__y)) // remainder static float _TG_ATTRS __tg_remainder(float __x, float __y) {return remainderf(__x, __y);} static double _TG_ATTRS __tg_remainder(double __x, double __y) {return remainder(__x, __y);} static long double _TG_ATTRS __tg_remainder(long double __x, long double __y) {return remainderl(__x, __y);} #undef remainder #define remainder(__x, __y) __tg_remainder(__tg_promote2((__x), (__y))(__x), \ __tg_promote2((__x), (__y))(__y)) // remquo static float _TG_ATTRS __tg_remquo(float __x, float __y, int* __z) {return remquof(__x, __y, __z);} static double _TG_ATTRS __tg_remquo(double __x, double __y, int* __z) {return remquo(__x, __y, __z);} static long double _TG_ATTRS __tg_remquo(long double __x,long double __y, int* __z) {return remquol(__x, __y, __z);} #undef remquo #define remquo(__x, __y, __z) \ __tg_remquo(__tg_promote2((__x), (__y))(__x), \ __tg_promote2((__x), (__y))(__y), \ (__z)) // rint static float _TG_ATTRS __tg_rint(float __x) {return rintf(__x);} static double _TG_ATTRS __tg_rint(double __x) {return rint(__x);} static long double _TG_ATTRS __tg_rint(long double __x) {return rintl(__x);} #undef rint #define rint(__x) __tg_rint(__tg_promote1((__x))(__x)) // round static float _TG_ATTRS __tg_round(float __x) {return roundf(__x);} static double _TG_ATTRS __tg_round(double __x) {return round(__x);} static long double _TG_ATTRS __tg_round(long double __x) {return roundl(__x);} #undef round #define round(__x) __tg_round(__tg_promote1((__x))(__x)) // scalbn static float _TG_ATTRS __tg_scalbn(float __x, int __y) {return scalbnf(__x, __y);} static double _TG_ATTRS __tg_scalbn(double __x, int __y) {return scalbn(__x, __y);} static long double _TG_ATTRS __tg_scalbn(long double __x, int __y) {return scalbnl(__x, __y);} #undef scalbn #define scalbn(__x, __y) __tg_scalbn(__tg_promote1((__x))(__x), __y) // scalbln static float _TG_ATTRS __tg_scalbln(float __x, long __y) {return scalblnf(__x, __y);} static double _TG_ATTRS __tg_scalbln(double __x, long __y) {return scalbln(__x, __y);} static long double _TG_ATTRS __tg_scalbln(long double __x, long __y) {return scalblnl(__x, __y);} #undef scalbln #define scalbln(__x, __y) __tg_scalbln(__tg_promote1((__x))(__x), __y) // tgamma static float _TG_ATTRS __tg_tgamma(float __x) {return tgammaf(__x);} static double _TG_ATTRS __tg_tgamma(double __x) {return tgamma(__x);} static long double _TG_ATTRS __tg_tgamma(long double __x) {return tgammal(__x);} #undef tgamma #define tgamma(__x) __tg_tgamma(__tg_promote1((__x))(__x)) // trunc static float _TG_ATTRS __tg_trunc(float __x) {return truncf(__x);} static double _TG_ATTRS __tg_trunc(double __x) {return trunc(__x);} static long double _TG_ATTRS __tg_trunc(long double __x) {return truncl(__x);} #undef trunc #define trunc(__x) __tg_trunc(__tg_promote1((__x))(__x)) // carg static float _TG_ATTRS __tg_carg(float __x) {return atan2f(0.F, __x);} static double _TG_ATTRS __tg_carg(double __x) {return atan2(0., __x);} static long double _TG_ATTRS __tg_carg(long double __x) {return atan2l(0.L, __x);} static float _TG_ATTRS __tg_carg(float _Complex __x) {return cargf(__x);} static double _TG_ATTRS __tg_carg(double _Complex __x) {return carg(__x);} static long double _TG_ATTRS __tg_carg(long double _Complex __x) {return cargl(__x);} #undef carg #define carg(__x) __tg_carg(__tg_promote1((__x))(__x)) // cimag static float _TG_ATTRS __tg_cimag(float __x) {return 0;} static double _TG_ATTRS __tg_cimag(double __x) {return 0;} static long double _TG_ATTRS __tg_cimag(long double __x) {return 0;} static float _TG_ATTRS __tg_cimag(float _Complex __x) {return cimagf(__x);} static double _TG_ATTRS __tg_cimag(double _Complex __x) {return cimag(__x);} static long double _TG_ATTRS __tg_cimag(long double _Complex __x) {return cimagl(__x);} #undef cimag #define cimag(__x) __tg_cimag(__tg_promote1((__x))(__x)) // conj static float _Complex _TG_ATTRS __tg_conj(float __x) {return __x;} static double _Complex _TG_ATTRS __tg_conj(double __x) {return __x;} static long double _Complex _TG_ATTRS __tg_conj(long double __x) {return __x;} static float _Complex _TG_ATTRS __tg_conj(float _Complex __x) {return conjf(__x);} static double _Complex _TG_ATTRS __tg_conj(double _Complex __x) {return conj(__x);} static long double _Complex _TG_ATTRS __tg_conj(long double _Complex __x) {return conjl(__x);} #undef conj #define conj(__x) __tg_conj(__tg_promote1((__x))(__x)) // cproj static float _Complex _TG_ATTRS __tg_cproj(float __x) {return cprojf(__x);} static double _Complex _TG_ATTRS __tg_cproj(double __x) {return cproj(__x);} static long double _Complex _TG_ATTRS __tg_cproj(long double __x) {return cprojl(__x);} static float _Complex _TG_ATTRS __tg_cproj(float _Complex __x) {return cprojf(__x);} static double _Complex _TG_ATTRS __tg_cproj(double _Complex __x) {return cproj(__x);} static long double _Complex _TG_ATTRS __tg_cproj(long double _Complex __x) {return cprojl(__x);} #undef cproj #define cproj(__x) __tg_cproj(__tg_promote1((__x))(__x)) // creal static float _TG_ATTRS __tg_creal(float __x) {return __x;} static double _TG_ATTRS __tg_creal(double __x) {return __x;} static long double _TG_ATTRS __tg_creal(long double __x) {return __x;} static float _TG_ATTRS __tg_creal(float _Complex __x) {return crealf(__x);} static double _TG_ATTRS __tg_creal(double _Complex __x) {return creal(__x);} static long double _TG_ATTRS __tg_creal(long double _Complex __x) {return creall(__x);} #undef creal #define creal(__x) __tg_creal(__tg_promote1((__x))(__x)) #undef _TG_ATTRSp #undef _TG_ATTRS #endif /* __cplusplus */ #endif /* __has_include_next */ #endif /* __CLANG_TGMATH_H */ /*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __TMMINTRIN_H #define __TMMINTRIN_H #if !defined(__i386__) && !defined(__x86_64__) #error "This header is only meant to be used on x86 and x64 architecture" #endif #include /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3"), __min_vector_width__(64))) #define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,ssse3"), __min_vector_width__(64))) /// Computes the absolute value of each of the packed 8-bit signed /// integers in the source operand and stores the 8-bit unsigned integer /// results in the destination. /// /// \headerfile /// /// This intrinsic corresponds to the \c PABSB instruction. /// /// \param __a /// A 64-bit vector of [8 x i8]. /// \returns A 64-bit integer vector containing the absolute values of the /// elements in the operand. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_abs_pi8(__m64 __a) { return (__m64)__builtin_ia32_pabsb((__v8qi)__a); } /// Computes the absolute value of each of the packed 8-bit signed /// integers in the source operand and stores the 8-bit unsigned integer /// results in the destination. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPABSB instruction. /// /// \param __a /// A 128-bit vector of [16 x i8]. /// \returns A 128-bit integer vector containing the absolute values of the /// elements in the operand. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi8(__m128i __a) { return (__m128i)__builtin_elementwise_abs((__v16qs)__a); } /// Computes the absolute value of each of the packed 16-bit signed /// integers in the source operand and stores the 16-bit unsigned integer /// results in the destination. /// /// \headerfile /// /// This intrinsic corresponds to the \c PABSW instruction. /// /// \param __a /// A 64-bit vector of [4 x i16]. /// \returns A 64-bit integer vector containing the absolute values of the /// elements in the operand. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_abs_pi16(__m64 __a) { return (__m64)__builtin_ia32_pabsw((__v4hi)__a); } /// Computes the absolute value of each of the packed 16-bit signed /// integers in the source operand and stores the 16-bit unsigned integer /// results in the destination. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPABSW instruction. /// /// \param __a /// A 128-bit vector of [8 x i16]. /// \returns A 128-bit integer vector containing the absolute values of the /// elements in the operand. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi16(__m128i __a) { return (__m128i)__builtin_elementwise_abs((__v8hi)__a); } /// Computes the absolute value of each of the packed 32-bit signed /// integers in the source operand and stores the 32-bit unsigned integer /// results in the destination. /// /// \headerfile /// /// This intrinsic corresponds to the \c PABSD instruction. /// /// \param __a /// A 64-bit vector of [2 x i32]. /// \returns A 64-bit integer vector containing the absolute values of the /// elements in the operand. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_abs_pi32(__m64 __a) { return (__m64)__builtin_ia32_pabsd((__v2si)__a); } /// Computes the absolute value of each of the packed 32-bit signed /// integers in the source operand and stores the 32-bit unsigned integer /// results in the destination. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPABSD instruction. /// /// \param __a /// A 128-bit vector of [4 x i32]. /// \returns A 128-bit integer vector containing the absolute values of the /// elements in the operand. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi32(__m128i __a) { return (__m128i)__builtin_elementwise_abs((__v4si)__a); } /// Concatenates the two 128-bit integer vector operands, and /// right-shifts the result by the number of bytes specified in the immediate /// operand. /// /// \headerfile /// /// \code /// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n); /// \endcode /// /// This intrinsic corresponds to the \c PALIGNR instruction. /// /// \param a /// A 128-bit vector of [16 x i8] containing one of the source operands. /// \param b /// A 128-bit vector of [16 x i8] containing one of the source operands. /// \param n /// An immediate operand specifying how many bytes to right-shift the result. /// \returns A 128-bit integer vector containing the concatenated right-shifted /// value. #define _mm_alignr_epi8(a, b, n) \ ((__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \ (__v16qi)(__m128i)(b), (n))) /// Concatenates the two 64-bit integer vector operands, and right-shifts /// the result by the number of bytes specified in the immediate operand. /// /// \headerfile /// /// \code /// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n); /// \endcode /// /// This intrinsic corresponds to the \c PALIGNR instruction. /// /// \param a /// A 64-bit vector of [8 x i8] containing one of the source operands. /// \param b /// A 64-bit vector of [8 x i8] containing one of the source operands. /// \param n /// An immediate operand specifying how many bytes to right-shift the result. /// \returns A 64-bit integer vector containing the concatenated right-shifted /// value. #define _mm_alignr_pi8(a, b, n) \ ((__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n))) /// Horizontally adds the adjacent pairs of values contained in 2 packed /// 128-bit vectors of [8 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPHADDW instruction. /// /// \param __a /// A 128-bit vector of [8 x i16] containing one of the source operands. The /// horizontal sums of the values are stored in the lower bits of the /// destination. /// \param __b /// A 128-bit vector of [8 x i16] containing one of the source operands. The /// horizontal sums of the values are stored in the upper bits of the /// destination. /// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of /// both operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadd_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b); } /// Horizontally adds the adjacent pairs of values contained in 2 packed /// 128-bit vectors of [4 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPHADDD instruction. /// /// \param __a /// A 128-bit vector of [4 x i32] containing one of the source operands. The /// horizontal sums of the values are stored in the lower bits of the /// destination. /// \param __b /// A 128-bit vector of [4 x i32] containing one of the source operands. The /// horizontal sums of the values are stored in the upper bits of the /// destination. /// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of /// both operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadd_epi32(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b); } /// Horizontally adds the adjacent pairs of values contained in 2 packed /// 64-bit vectors of [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the \c PHADDW instruction. /// /// \param __a /// A 64-bit vector of [4 x i16] containing one of the source operands. The /// horizontal sums of the values are stored in the lower bits of the /// destination. /// \param __b /// A 64-bit vector of [4 x i16] containing one of the source operands. The /// horizontal sums of the values are stored in the upper bits of the /// destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both /// operands. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hadd_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b); } /// Horizontally adds the adjacent pairs of values contained in 2 packed /// 64-bit vectors of [2 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the \c PHADDD instruction. /// /// \param __a /// A 64-bit vector of [2 x i32] containing one of the source operands. The /// horizontal sums of the values are stored in the lower bits of the /// destination. /// \param __b /// A 64-bit vector of [2 x i32] containing one of the source operands. The /// horizontal sums of the values are stored in the upper bits of the /// destination. /// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both /// operands. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hadd_pi32(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b); } /// Horizontally adds the adjacent pairs of values contained in 2 packed /// 128-bit vectors of [8 x i16]. Positive sums greater than 0x7FFF are /// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to /// 0x8000. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPHADDSW instruction. /// /// \param __a /// A 128-bit vector of [8 x i16] containing one of the source operands. The /// horizontal sums of the values are stored in the lower bits of the /// destination. /// \param __b /// A 128-bit vector of [8 x i16] containing one of the source operands. The /// horizontal sums of the values are stored in the upper bits of the /// destination. /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated /// sums of both operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadds_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b); } /// Horizontally adds the adjacent pairs of values contained in 2 packed /// 64-bit vectors of [4 x i16]. Positive sums greater than 0x7FFF are /// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to /// 0x8000. /// /// \headerfile /// /// This intrinsic corresponds to the \c PHADDSW instruction. /// /// \param __a /// A 64-bit vector of [4 x i16] containing one of the source operands. The /// horizontal sums of the values are stored in the lower bits of the /// destination. /// \param __b /// A 64-bit vector of [4 x i16] containing one of the source operands. The /// horizontal sums of the values are stored in the upper bits of the /// destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated /// sums of both operands. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hadds_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b); } /// Horizontally subtracts the adjacent pairs of values contained in 2 /// packed 128-bit vectors of [8 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPHSUBW instruction. /// /// \param __a /// A 128-bit vector of [8 x i16] containing one of the source operands. The /// horizontal differences between the values are stored in the lower bits of /// the destination. /// \param __b /// A 128-bit vector of [8 x i16] containing one of the source operands. The /// horizontal differences between the values are stored in the upper bits of /// the destination. /// \returns A 128-bit vector of [8 x i16] containing the horizontal differences /// of both operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsub_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b); } /// Horizontally subtracts the adjacent pairs of values contained in 2 /// packed 128-bit vectors of [4 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPHSUBD instruction. /// /// \param __a /// A 128-bit vector of [4 x i32] containing one of the source operands. The /// horizontal differences between the values are stored in the lower bits of /// the destination. /// \param __b /// A 128-bit vector of [4 x i32] containing one of the source operands. The /// horizontal differences between the values are stored in the upper bits of /// the destination. /// \returns A 128-bit vector of [4 x i32] containing the horizontal differences /// of both operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsub_epi32(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b); } /// Horizontally subtracts the adjacent pairs of values contained in 2 /// packed 64-bit vectors of [4 x i16]. /// /// \headerfile /// /// This intrinsic corresponds to the \c PHSUBW instruction. /// /// \param __a /// A 64-bit vector of [4 x i16] containing one of the source operands. The /// horizontal differences between the values are stored in the lower bits of /// the destination. /// \param __b /// A 64-bit vector of [4 x i16] containing one of the source operands. The /// horizontal differences between the values are stored in the upper bits of /// the destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal differences /// of both operands. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hsub_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b); } /// Horizontally subtracts the adjacent pairs of values contained in 2 /// packed 64-bit vectors of [2 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the \c PHSUBD instruction. /// /// \param __a /// A 64-bit vector of [2 x i32] containing one of the source operands. The /// horizontal differences between the values are stored in the lower bits of /// the destination. /// \param __b /// A 64-bit vector of [2 x i32] containing one of the source operands. The /// horizontal differences between the values are stored in the upper bits of /// the destination. /// \returns A 64-bit vector of [2 x i32] containing the horizontal differences /// of both operands. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hsub_pi32(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b); } /// Horizontally subtracts the adjacent pairs of values contained in 2 /// packed 128-bit vectors of [8 x i16]. Positive differences greater than /// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are /// saturated to 0x8000. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPHSUBSW instruction. /// /// \param __a /// A 128-bit vector of [8 x i16] containing one of the source operands. The /// horizontal differences between the values are stored in the lower bits of /// the destination. /// \param __b /// A 128-bit vector of [8 x i16] containing one of the source operands. The /// horizontal differences between the values are stored in the upper bits of /// the destination. /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated /// differences of both operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsubs_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b); } /// Horizontally subtracts the adjacent pairs of values contained in 2 /// packed 64-bit vectors of [4 x i16]. Positive differences greater than /// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are /// saturated to 0x8000. /// /// \headerfile /// /// This intrinsic corresponds to the \c PHSUBSW instruction. /// /// \param __a /// A 64-bit vector of [4 x i16] containing one of the source operands. The /// horizontal differences between the values are stored in the lower bits of /// the destination. /// \param __b /// A 64-bit vector of [4 x i16] containing one of the source operands. The /// horizontal differences between the values are stored in the upper bits of /// the destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated /// differences of both operands. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hsubs_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b); } /// Multiplies corresponding pairs of packed 8-bit unsigned integer /// values contained in the first source operand and packed 8-bit signed /// integer values contained in the second source operand, adds pairs of /// contiguous products with signed saturation, and writes the 16-bit sums to /// the corresponding bits in the destination. /// /// For example, bits [7:0] of both operands are multiplied, bits [15:8] of /// both operands are multiplied, and the sum of both results is written to /// bits [15:0] of the destination. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMADDUBSW instruction. /// /// \param __a /// A 128-bit integer vector containing the first source operand. /// \param __b /// A 128-bit integer vector containing the second source operand. /// \returns A 128-bit integer vector containing the sums of products of both /// operands: \n /// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n /// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n /// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n /// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) \n /// \a R4 := (\a __a8 * \a __b8) + (\a __a9 * \a __b9) \n /// \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n /// \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n /// \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maddubs_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b); } /// Multiplies corresponding pairs of packed 8-bit unsigned integer /// values contained in the first source operand and packed 8-bit signed /// integer values contained in the second source operand, adds pairs of /// contiguous products with signed saturation, and writes the 16-bit sums to /// the corresponding bits in the destination. /// /// For example, bits [7:0] of both operands are multiplied, bits [15:8] of /// both operands are multiplied, and the sum of both results is written to /// bits [15:0] of the destination. /// /// \headerfile /// /// This intrinsic corresponds to the \c PMADDUBSW instruction. /// /// \param __a /// A 64-bit integer vector containing the first source operand. /// \param __b /// A 64-bit integer vector containing the second source operand. /// \returns A 64-bit integer vector containing the sums of products of both /// operands: \n /// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n /// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n /// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n /// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_maddubs_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b); } /// Multiplies packed 16-bit signed integer values, truncates the 32-bit /// products to the 18 most significant bits by right-shifting, rounds the /// truncated value by adding 1, and writes bits [16:1] to the destination. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPMULHRSW instruction. /// /// \param __a /// A 128-bit vector of [8 x i16] containing one of the source operands. /// \param __b /// A 128-bit vector of [8 x i16] containing one of the source operands. /// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled /// products of both operands. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhrs_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b); } /// Multiplies packed 16-bit signed integer values, truncates the 32-bit /// products to the 18 most significant bits by right-shifting, rounds the /// truncated value by adding 1, and writes bits [16:1] to the destination. /// /// \headerfile /// /// This intrinsic corresponds to the \c PMULHRSW instruction. /// /// \param __a /// A 64-bit vector of [4 x i16] containing one of the source operands. /// \param __b /// A 64-bit vector of [4 x i16] containing one of the source operands. /// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled /// products of both operands. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_mulhrs_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b); } /// Copies the 8-bit integers from a 128-bit integer vector to the /// destination or clears 8-bit values in the destination, as specified by /// the second source operand. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSHUFB instruction. /// /// \param __a /// A 128-bit integer vector containing the values to be copied. /// \param __b /// A 128-bit integer vector containing control bytes corresponding to /// positions in the destination: /// Bit 7: \n /// 1: Clear the corresponding byte in the destination. \n /// 0: Copy the selected source byte to the corresponding byte in the /// destination. \n /// Bits [6:4] Reserved. \n /// Bits [3:0] select the source byte to be copied. /// \returns A 128-bit integer vector containing the copied or cleared values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_shuffle_epi8(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b); } /// Copies the 8-bit integers from a 64-bit integer vector to the /// destination or clears 8-bit values in the destination, as specified by /// the second source operand. /// /// \headerfile /// /// This intrinsic corresponds to the \c PSHUFB instruction. /// /// \param __a /// A 64-bit integer vector containing the values to be copied. /// \param __b /// A 64-bit integer vector containing control bytes corresponding to /// positions in the destination: /// Bit 7: \n /// 1: Clear the corresponding byte in the destination. \n /// 0: Copy the selected source byte to the corresponding byte in the /// destination. \n /// Bits [3:0] select the source byte to be copied. /// \returns A 64-bit integer vector containing the copied or cleared values. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_shuffle_pi8(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b); } /// For each 8-bit integer in the first source operand, perform one of /// the following actions as specified by the second source operand. /// /// If the byte in the second source is negative, calculate the two's /// complement of the corresponding byte in the first source, and write that /// value to the destination. If the byte in the second source is positive, /// copy the corresponding byte from the first source to the destination. If /// the byte in the second source is zero, clear the corresponding byte in /// the destination. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSIGNB instruction. /// /// \param __a /// A 128-bit integer vector containing the values to be copied. /// \param __b /// A 128-bit integer vector containing control bytes corresponding to /// positions in the destination. /// \returns A 128-bit integer vector containing the resultant values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi8(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b); } /// For each 16-bit integer in the first source operand, perform one of /// the following actions as specified by the second source operand. /// /// If the word in the second source is negative, calculate the two's /// complement of the corresponding word in the first source, and write that /// value to the destination. If the word in the second source is positive, /// copy the corresponding word from the first source to the destination. If /// the word in the second source is zero, clear the corresponding word in /// the destination. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSIGNW instruction. /// /// \param __a /// A 128-bit integer vector containing the values to be copied. /// \param __b /// A 128-bit integer vector containing control words corresponding to /// positions in the destination. /// \returns A 128-bit integer vector containing the resultant values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b); } /// For each 32-bit integer in the first source operand, perform one of /// the following actions as specified by the second source operand. /// /// If the doubleword in the second source is negative, calculate the two's /// complement of the corresponding word in the first source, and write that /// value to the destination. If the doubleword in the second source is /// positive, copy the corresponding word from the first source to the /// destination. If the doubleword in the second source is zero, clear the /// corresponding word in the destination. /// /// \headerfile /// /// This intrinsic corresponds to the \c VPSIGND instruction. /// /// \param __a /// A 128-bit integer vector containing the values to be copied. /// \param __b /// A 128-bit integer vector containing control doublewords corresponding to /// positions in the destination. /// \returns A 128-bit integer vector containing the resultant values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi32(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b); } /// For each 8-bit integer in the first source operand, perform one of /// the following actions as specified by the second source operand. /// /// If the byte in the second source is negative, calculate the two's /// complement of the corresponding byte in the first source, and write that /// value to the destination. If the byte in the second source is positive, /// copy the corresponding byte from the first source to the destination. If /// the byte in the second source is zero, clear the corresponding byte in /// the destination. /// /// \headerfile /// /// This intrinsic corresponds to the \c PSIGNB instruction. /// /// \param __a /// A 64-bit integer vector containing the values to be copied. /// \param __b /// A 64-bit integer vector containing control bytes corresponding to /// positions in the destination. /// \returns A 64-bit integer vector containing the resultant values. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sign_pi8(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b); } /// For each 16-bit integer in the first source operand, perform one of /// the following actions as specified by the second source operand. /// /// If the word in the second source is negative, calculate the two's /// complement of the corresponding word in the first source, and write that /// value to the destination. If the word in the second source is positive, /// copy the corresponding word from the first source to the destination. If /// the word in the second source is zero, clear the corresponding word in /// the destination. /// /// \headerfile /// /// This intrinsic corresponds to the \c PSIGNW instruction. /// /// \param __a /// A 64-bit integer vector containing the values to be copied. /// \param __b /// A 64-bit integer vector containing control words corresponding to /// positions in the destination. /// \returns A 64-bit integer vector containing the resultant values. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sign_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b); } /// For each 32-bit integer in the first source operand, perform one of /// the following actions as specified by the second source operand. /// /// If the doubleword in the second source is negative, calculate the two's /// complement of the corresponding doubleword in the first source, and /// write that value to the destination. If the doubleword in the second /// source is positive, copy the corresponding doubleword from the first /// source to the destination. If the doubleword in the second source is /// zero, clear the corresponding doubleword in the destination. /// /// \headerfile /// /// This intrinsic corresponds to the \c PSIGND instruction. /// /// \param __a /// A 64-bit integer vector containing the values to be copied. /// \param __b /// A 64-bit integer vector containing two control doublewords corresponding /// to positions in the destination. /// \returns A 64-bit integer vector containing the resultant values. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sign_pi32(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b); } #undef __DEFAULT_FN_ATTRS #undef __DEFAULT_FN_ATTRS_MMX #endif /* __TMMINTRIN_H */ /*===------------- tsxldtrkintrin.h - tsxldtrk intrinsics ------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __TSXLDTRKINTRIN_H #define __TSXLDTRKINTRIN_H /* Define the default attributes for the functions in this file */ #define _DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("tsxldtrk"))) /// Marks the start of an TSX (RTM) suspend load address tracking region. If /// this intrinsic is used inside a transactional region, subsequent loads /// are not added to the read set of the transaction. If it's used inside a /// suspend load address tracking region it will cause transaction abort. /// If it's used outside of a transactional region it behaves like a NOP. /// /// \headerfile /// /// This intrinsic corresponds to the \c XSUSLDTRK instruction. /// static __inline__ void _DEFAULT_FN_ATTRS _xsusldtrk (void) { __builtin_ia32_xsusldtrk(); } /// Marks the end of an TSX (RTM) suspend load address tracking region. If this /// intrinsic is used inside a suspend load address tracking region it will /// end the suspend region and all following load addresses will be added to /// the transaction read set. If it's used inside an active transaction but /// not in a suspend region it will cause transaction abort. If it's used /// outside of a transactional region it behaves like a NOP. /// /// \headerfile /// /// This intrinsic corresponds to the \c XRESLDTRK instruction. /// static __inline__ void _DEFAULT_FN_ATTRS _xresldtrk (void) { __builtin_ia32_xresldtrk(); } #undef _DEFAULT_FN_ATTRS #endif /* __TSXLDTRKINTRIN_H */ /*===------------------ uintrintrin.h - UINTR intrinsics -------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __X86GPRINTRIN_H #error "Never use directly; include instead." #endif #ifndef __UINTRINTRIN_H #define __UINTRINTRIN_H /* Define the default attributes for the functions in this file */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("uintr"))) #ifdef __x86_64__ struct __uintr_frame { unsigned long long rip; unsigned long long rflags; unsigned long long rsp; }; /// Clears the user interrupt flag (UIF). Its effect takes place immediately: a /// user interrupt cannot be delivered on the instruction boundary following /// CLUI. Can be executed only if CR4.UINT = 1, the logical processor is in /// 64-bit mode, and software is not executing inside an enclave; otherwise, /// each causes an invalid-opcode exception. Causes a transactional abort if /// executed inside a transactional region; the abort loads EAX as it would /// had it been due to an execution of CLI. /// /// \headerfile /// /// This intrinsic corresponds to the CLUI instruction. /// /// \code{.operation} /// UIF := 0 /// \endcode static __inline__ void __DEFAULT_FN_ATTRS _clui (void) { __builtin_ia32_clui(); } /// Sets the user interrupt flag (UIF). Its effect takes place immediately; a /// user interrupt may be delivered on the instruction boundary following /// STUI. Can be executed only if CR4.UINT = 1, the logical processor is in /// 64-bit mode, and software is not executing inside an enclave; otherwise, /// each causes an invalid-opcode exception. Causes a transactional abort if /// executed inside a transactional region; the abort loads EAX as it would /// had it been due to an execution of STI. /// /// \headerfile /// /// This intrinsic corresponds to the STUI instruction. /// /// \code{.operation} /// UIF := 1 /// \endcode static __inline__ void __DEFAULT_FN_ATTRS _stui (void) { __builtin_ia32_stui(); } /// Get the current value of the user interrupt flag (UIF). Can be executed /// regardless of CPL and inside a transactional region. Can be executed only /// if CR4.UINT = 1, the logical processor is in 64-bit mode, and software is /// not executing inside an enclave; otherwise, it causes an invalid-opcode /// exception. /// /// \headerfile /// /// This intrinsic corresponds to the TESTUI instruction. /// /// \returns The current value of the user interrupt flag (UIF). /// /// \code{.operation} /// CF := UIF /// ZF := 0 /// AF := 0 /// OF := 0 /// PF := 0 /// SF := 0 /// dst := CF /// \endcode static __inline__ unsigned char __DEFAULT_FN_ATTRS _testui (void) { return __builtin_ia32_testui(); } /// Send interprocessor user interrupt. Can be executed only if /// CR4.UINT = IA32_UINT_TT[0] = 1, the logical processor is in 64-bit mode, /// and software is not executing inside an enclave; otherwise, it causes an /// invalid-opcode exception. May be executed at any privilege level, all of /// its memory accesses are performed with supervisor privilege. /// /// \headerfile /// /// This intrinsic corresponds to the SENDUIPI instruction /// /// \param __a /// Index of user-interrupt target table entry in user-interrupt target /// table. /// /// \code{.operation} /// IF __a > UITTSZ /// GP (0) /// FI /// tempUITTE := MEM[UITTADDR + (a<<4)] /// // tempUITTE must be valid, and can't have any reserved bit set /// IF (tempUITTE.V == 0 OR tempUITTE[7:1] != 0) /// GP (0) /// FI /// tempUPID := MEM[tempUITTE.UPIDADDR] // under lock /// // tempUPID can't have any reserved bit set /// IF (tempUPID[15:2] != 0 OR tempUPID[31:24] != 0) /// GP (0) // release lock /// FI /// tempUPID.PIR[tempUITTE.UV] := 1; /// IF (tempUPID.SN == 0 AND tempUPID.ON == 0) /// tempUPID.ON := 1 /// sendNotify := 1 /// ELSE /// sendNotify := 0 /// FI /// MEM[tempUITTE.UPIDADDR] := tempUPID // release lock /// IF sendNotify == 1 /// IF IA32_APIC_BASE[10] == 1 // local APIC is in x2APIC mode /// // send ordinary IPI with vector tempUPID.NV to 32-bit physical APIC /// // ID tempUPID.NDST /// SendOrdinaryIPI(tempUPID.NV, tempUPID.NDST) /// ELSE /// // send ordinary IPI with vector tempUPID.NV to 8-bit physical APIC /// // ID tempUPID.NDST[15:8] /// SendOrdinaryIPI(tempUPID.NV, tempUPID.NDST[15:8]) /// FI /// FI /// \endcode static __inline__ void __DEFAULT_FN_ATTRS _senduipi (unsigned long long __a) { __builtin_ia32_senduipi(__a); } #endif /* __x86_64__ */ #undef __DEFAULT_FN_ATTRS #endif /* __UINTRINTRIN_H */ /*===---- unwind.h - Stack unwinding ----------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ /* See "Data Definitions for libgcc_s" in the Linux Standard Base.*/ #if defined(__APPLE__) && __has_include_next() /* Darwin (from 11.x on) provide an unwind.h. If that's available, * use it. libunwind wraps some of its definitions in #ifdef _GNU_SOURCE, * so define that around the include.*/ # ifndef _GNU_SOURCE # define _SHOULD_UNDEFINE_GNU_SOURCE # define _GNU_SOURCE # endif // libunwind's unwind.h reflects the current visibility. However, Mozilla // builds with -fvisibility=hidden and relies on gcc's unwind.h to reset the // visibility to default and export its contents. gcc also allows users to // override its override by #defining HIDE_EXPORTS (but note, this only obeys // the user's -fvisibility setting; it doesn't hide any exports on its own). We // imitate gcc's header here: # ifdef HIDE_EXPORTS # include_next # else # pragma GCC visibility push(default) # include_next # pragma GCC visibility pop # endif # ifdef _SHOULD_UNDEFINE_GNU_SOURCE # undef _GNU_SOURCE # undef _SHOULD_UNDEFINE_GNU_SOURCE # endif #else #ifndef __CLANG_UNWIND_H #define __CLANG_UNWIND_H #include #ifdef __cplusplus extern "C" { #endif /* It is a bit strange for a header to play with the visibility of the symbols it declares, but this matches gcc's behavior and some programs depend on it */ #ifndef HIDE_EXPORTS #pragma GCC visibility push(default) #endif typedef uintptr_t _Unwind_Word __attribute__((__mode__(__unwind_word__))); typedef intptr_t _Unwind_Sword __attribute__((__mode__(__unwind_word__))); typedef uintptr_t _Unwind_Ptr; typedef uintptr_t _Unwind_Internal_Ptr; typedef uint64_t _Unwind_Exception_Class; typedef intptr_t _sleb128_t; typedef uintptr_t _uleb128_t; struct _Unwind_Context; #if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || \ defined(__ARM_DWARF_EH__) || defined(__SEH__)) struct _Unwind_Control_Block; typedef struct _Unwind_Control_Block _Unwind_Control_Block; #define _Unwind_Exception _Unwind_Control_Block /* Alias */ #else struct _Unwind_Exception; typedef struct _Unwind_Exception _Unwind_Exception; #endif typedef enum { _URC_NO_REASON = 0, #if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \ !defined(__ARM_DWARF_EH__) && !defined(__SEH__) _URC_OK = 0, /* used by ARM EHABI */ #endif _URC_FOREIGN_EXCEPTION_CAUGHT = 1, _URC_FATAL_PHASE2_ERROR = 2, _URC_FATAL_PHASE1_ERROR = 3, _URC_NORMAL_STOP = 4, _URC_END_OF_STACK = 5, _URC_HANDLER_FOUND = 6, _URC_INSTALL_CONTEXT = 7, _URC_CONTINUE_UNWIND = 8, #if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \ !defined(__ARM_DWARF_EH__) && !defined(__SEH__) _URC_FAILURE = 9 /* used by ARM EHABI */ #endif } _Unwind_Reason_Code; typedef enum { _UA_SEARCH_PHASE = 1, _UA_CLEANUP_PHASE = 2, _UA_HANDLER_FRAME = 4, _UA_FORCE_UNWIND = 8, _UA_END_OF_STACK = 16 /* gcc extension to C++ ABI */ } _Unwind_Action; typedef void (*_Unwind_Exception_Cleanup_Fn)(_Unwind_Reason_Code, _Unwind_Exception *); #if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || \ defined(__ARM_DWARF_EH__) || defined(__SEH__)) typedef struct _Unwind_Control_Block _Unwind_Control_Block; typedef uint32_t _Unwind_EHT_Header; struct _Unwind_Control_Block { uint64_t exception_class; void (*exception_cleanup)(_Unwind_Reason_Code, _Unwind_Control_Block *); /* unwinder cache (private fields for the unwinder's use) */ struct { uint32_t reserved1; /* forced unwind stop function, 0 if not forced */ uint32_t reserved2; /* personality routine */ uint32_t reserved3; /* callsite */ uint32_t reserved4; /* forced unwind stop argument */ uint32_t reserved5; } unwinder_cache; /* propagation barrier cache (valid after phase 1) */ struct { uint32_t sp; uint32_t bitpattern[5]; } barrier_cache; /* cleanup cache (preserved over cleanup) */ struct { uint32_t bitpattern[4]; } cleanup_cache; /* personality cache (for personality's benefit) */ struct { uint32_t fnstart; /* function start address */ _Unwind_EHT_Header *ehtp; /* pointer to EHT entry header word */ uint32_t additional; /* additional data */ uint32_t reserved1; } pr_cache; long long int : 0; /* force alignment of next item to 8-byte boundary */ } __attribute__((__aligned__(8))); #else struct _Unwind_Exception { _Unwind_Exception_Class exception_class; _Unwind_Exception_Cleanup_Fn exception_cleanup; #if !defined (__USING_SJLJ_EXCEPTIONS__) && defined (__SEH__) _Unwind_Word private_[6]; #else _Unwind_Word private_1; _Unwind_Word private_2; #endif /* The Itanium ABI requires that _Unwind_Exception objects are "double-word * aligned". GCC has interpreted this to mean "use the maximum useful * alignment for the target"; so do we. */ } __attribute__((__aligned__)); #endif typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn)(int, _Unwind_Action, _Unwind_Exception_Class, _Unwind_Exception *, struct _Unwind_Context *, void *); typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)(int, _Unwind_Action, _Unwind_Exception_Class, _Unwind_Exception *, struct _Unwind_Context *); typedef _Unwind_Personality_Fn __personality_routine; typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context *, void *); #if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || \ defined(__ARM_DWARF_EH__) || defined(__SEH__)) typedef enum { _UVRSC_CORE = 0, /* integer register */ _UVRSC_VFP = 1, /* vfp */ _UVRSC_WMMXD = 3, /* Intel WMMX data register */ _UVRSC_WMMXC = 4, /* Intel WMMX control register */ _UVRSC_PSEUDO = 5 /* Special purpose pseudo register */ } _Unwind_VRS_RegClass; typedef enum { _UVRSD_UINT32 = 0, _UVRSD_VFPX = 1, _UVRSD_UINT64 = 3, _UVRSD_FLOAT = 4, _UVRSD_DOUBLE = 5 } _Unwind_VRS_DataRepresentation; typedef enum { _UVRSR_OK = 0, _UVRSR_NOT_IMPLEMENTED = 1, _UVRSR_FAILED = 2 } _Unwind_VRS_Result; typedef uint32_t _Unwind_State; #define _US_VIRTUAL_UNWIND_FRAME ((_Unwind_State)0) #define _US_UNWIND_FRAME_STARTING ((_Unwind_State)1) #define _US_UNWIND_FRAME_RESUME ((_Unwind_State)2) #define _US_ACTION_MASK ((_Unwind_State)3) #define _US_FORCE_UNWIND ((_Unwind_State)8) _Unwind_VRS_Result _Unwind_VRS_Get(struct _Unwind_Context *__context, _Unwind_VRS_RegClass __regclass, uint32_t __regno, _Unwind_VRS_DataRepresentation __representation, void *__valuep); _Unwind_VRS_Result _Unwind_VRS_Set(struct _Unwind_Context *__context, _Unwind_VRS_RegClass __regclass, uint32_t __regno, _Unwind_VRS_DataRepresentation __representation, void *__valuep); static __inline__ _Unwind_Word _Unwind_GetGR(struct _Unwind_Context *__context, int __index) { _Unwind_Word __value; _Unwind_VRS_Get(__context, _UVRSC_CORE, __index, _UVRSD_UINT32, &__value); return __value; } static __inline__ void _Unwind_SetGR(struct _Unwind_Context *__context, int __index, _Unwind_Word __value) { _Unwind_VRS_Set(__context, _UVRSC_CORE, __index, _UVRSD_UINT32, &__value); } static __inline__ _Unwind_Word _Unwind_GetIP(struct _Unwind_Context *__context) { _Unwind_Word __ip = _Unwind_GetGR(__context, 15); return __ip & ~(_Unwind_Word)(0x1); /* Remove thumb mode bit. */ } static __inline__ void _Unwind_SetIP(struct _Unwind_Context *__context, _Unwind_Word __value) { _Unwind_Word __thumb_mode_bit = _Unwind_GetGR(__context, 15) & 0x1; _Unwind_SetGR(__context, 15, __value | __thumb_mode_bit); } #else _Unwind_Word _Unwind_GetGR(struct _Unwind_Context *, int); void _Unwind_SetGR(struct _Unwind_Context *, int, _Unwind_Word); _Unwind_Word _Unwind_GetIP(struct _Unwind_Context *); void _Unwind_SetIP(struct _Unwind_Context *, _Unwind_Word); #endif _Unwind_Word _Unwind_GetIPInfo(struct _Unwind_Context *, int *); _Unwind_Word _Unwind_GetCFA(struct _Unwind_Context *); _Unwind_Word _Unwind_GetBSP(struct _Unwind_Context *); void *_Unwind_GetLanguageSpecificData(struct _Unwind_Context *); _Unwind_Ptr _Unwind_GetRegionStart(struct _Unwind_Context *); /* DWARF EH functions; currently not available on Darwin/ARM */ #if !defined(__APPLE__) || !defined(__arm__) _Unwind_Reason_Code _Unwind_RaiseException(_Unwind_Exception *); _Unwind_Reason_Code _Unwind_ForcedUnwind(_Unwind_Exception *, _Unwind_Stop_Fn, void *); void _Unwind_DeleteException(_Unwind_Exception *); void _Unwind_Resume(_Unwind_Exception *); _Unwind_Reason_Code _Unwind_Resume_or_Rethrow(_Unwind_Exception *); #endif _Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn, void *); /* setjmp(3)/longjmp(3) stuff */ typedef struct SjLj_Function_Context *_Unwind_FunctionContext_t; void _Unwind_SjLj_Register(_Unwind_FunctionContext_t); void _Unwind_SjLj_Unregister(_Unwind_FunctionContext_t); _Unwind_Reason_Code _Unwind_SjLj_RaiseException(_Unwind_Exception *); _Unwind_Reason_Code _Unwind_SjLj_ForcedUnwind(_Unwind_Exception *, _Unwind_Stop_Fn, void *); void _Unwind_SjLj_Resume(_Unwind_Exception *); _Unwind_Reason_Code _Unwind_SjLj_Resume_or_Rethrow(_Unwind_Exception *); void *_Unwind_FindEnclosingFunction(void *); #ifdef __APPLE__ _Unwind_Ptr _Unwind_GetDataRelBase(struct _Unwind_Context *) __attribute__((__unavailable__)); _Unwind_Ptr _Unwind_GetTextRelBase(struct _Unwind_Context *) __attribute__((__unavailable__)); /* Darwin-specific functions */ void __register_frame(const void *); void __deregister_frame(const void *); struct dwarf_eh_bases { uintptr_t tbase; uintptr_t dbase; uintptr_t func; }; void *_Unwind_Find_FDE(const void *, struct dwarf_eh_bases *); void __register_frame_info_bases(const void *, void *, void *, void *) __attribute__((__unavailable__)); void __register_frame_info(const void *, void *) __attribute__((__unavailable__)); void __register_frame_info_table_bases(const void *, void*, void *, void *) __attribute__((__unavailable__)); void __register_frame_info_table(const void *, void *) __attribute__((__unavailable__)); void __register_frame_table(const void *) __attribute__((__unavailable__)); void __deregister_frame_info(const void *) __attribute__((__unavailable__)); void __deregister_frame_info_bases(const void *)__attribute__((__unavailable__)); #else _Unwind_Ptr _Unwind_GetDataRelBase(struct _Unwind_Context *); _Unwind_Ptr _Unwind_GetTextRelBase(struct _Unwind_Context *); #endif #ifndef HIDE_EXPORTS #pragma GCC visibility pop #endif #ifdef __cplusplus } #endif #endif /* __CLANG_UNWIND_H */ #endif /* ===-------- vadefs.h ---------------------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ /* Only include this if we are aiming for MSVC compatibility. */ #ifndef _MSC_VER #include_next #else #ifndef __clang_vadefs_h #define __clang_vadefs_h #include_next /* Override macros from vadefs.h with definitions that work with Clang. */ #ifdef _crt_va_start #undef _crt_va_start #define _crt_va_start(ap, param) __builtin_va_start(ap, param) #endif #ifdef _crt_va_end #undef _crt_va_end #define _crt_va_end(ap) __builtin_va_end(ap) #endif #ifdef _crt_va_arg #undef _crt_va_arg #define _crt_va_arg(ap, type) __builtin_va_arg(ap, type) #endif /* VS 2015 switched to double underscore names, which is an improvement, but now * we have to intercept those names too. */ #ifdef __crt_va_start #undef __crt_va_start #define __crt_va_start(ap, param) __builtin_va_start(ap, param) #endif #ifdef __crt_va_end #undef __crt_va_end #define __crt_va_end(ap) __builtin_va_end(ap) #endif #ifdef __crt_va_arg #undef __crt_va_arg #define __crt_va_arg(ap, type) __builtin_va_arg(ap, type) #endif #endif #endif /*===------------------ vaesintrin.h - VAES intrinsics ---------------------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __VAESINTRIN_H #define __VAESINTRIN_H /* Default attributes for YMM forms. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("vaes"), __min_vector_width__(256))) /* Default attributes for ZMM forms. */ #define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__("avx512f,vaes"), __min_vector_width__(512))) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_aesenc_epi128(__m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_aesenc256((__v4di) __A, (__v4di) __B); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_aesdec_epi128(__m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_aesdec256((__v4di) __A, (__v4di) __B); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_aesenclast_epi128(__m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_aesenclast256((__v4di) __A, (__v4di) __B); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_aesdeclast_epi128(__m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_aesdeclast256((__v4di) __A, (__v4di) __B); } #ifdef __AVX512FINTRIN_H static __inline__ __m512i __DEFAULT_FN_ATTRS_F _mm512_aesenc_epi128(__m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_aesenc512((__v8di) __A, (__v8di) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS_F _mm512_aesdec_epi128(__m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_aesdec512((__v8di) __A, (__v8di) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS_F _mm512_aesenclast_epi128(__m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_aesenclast512((__v8di) __A, (__v8di) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS_F _mm512_aesdeclast_epi128(__m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_aesdeclast512((__v8di) __A, (__v8di) __B); } #endif // __AVX512FINTRIN_H #undef __DEFAULT_FN_ATTRS #undef __DEFAULT_FN_ATTRS_F #endif // __VAESINTRIN_H /*===---- varargs.h - Variable argument handling -------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __VARARGS_H #define __VARARGS_H #error "Please use instead of " #endif /*===---- velintrin.h - VEL intrinsics for VE ------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __VEL_INTRIN_H__ #define __VEL_INTRIN_H__ // Vector registers typedef double __vr __attribute__((__vector_size__(2048))); // Vector mask registers #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // For C99 typedef _Bool __vm __attribute__((ext_vector_type(256))); typedef _Bool __vm256 __attribute__((ext_vector_type(256))); typedef _Bool __vm512 __attribute__((ext_vector_type(512))); #else #ifdef __cplusplus // For C++ typedef bool __vm __attribute__((ext_vector_type(256))); typedef bool __vm256 __attribute__((ext_vector_type(256))); typedef bool __vm512 __attribute__((ext_vector_type(512))); #else #error need C++ or C99 to use vector intrinsics for VE #endif #endif enum VShuffleCodes { VE_VSHUFFLE_YUYU = 0, VE_VSHUFFLE_YUYL = 1, VE_VSHUFFLE_YUZU = 2, VE_VSHUFFLE_YUZL = 3, VE_VSHUFFLE_YLYU = 4, VE_VSHUFFLE_YLYL = 5, VE_VSHUFFLE_YLZU = 6, VE_VSHUFFLE_YLZL = 7, VE_VSHUFFLE_ZUYU = 8, VE_VSHUFFLE_ZUYL = 9, VE_VSHUFFLE_ZUZU = 10, VE_VSHUFFLE_ZUZL = 11, VE_VSHUFFLE_ZLYU = 12, VE_VSHUFFLE_ZLYL = 13, VE_VSHUFFLE_ZLZU = 14, VE_VSHUFFLE_ZLZL = 15, }; // Use generated intrinsic name definitions #include // Use helper functions #include // pack #define _vel_pack_f32p __builtin_ve_vl_pack_f32p #define _vel_pack_f32a __builtin_ve_vl_pack_f32a static inline unsigned long int _vel_pack_i32(unsigned int a, unsigned int b) { return (((unsigned long int)a) << 32) | b; } #define _vel_extract_vm512u(vm) __builtin_ve_vl_extract_vm512u(vm) #define _vel_extract_vm512l(vm) __builtin_ve_vl_extract_vm512l(vm) #define _vel_insert_vm512u(vm512, vm) __builtin_ve_vl_insert_vm512u(vm512, vm) #define _vel_insert_vm512l(vm512, vm) __builtin_ve_vl_insert_vm512l(vm512, vm) #endif /*===---- velintrin_approx.h - VEL intrinsics helper for VE ----------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __VEL_INTRIN_APPROX_H__ #define __VEL_INTRIN_APPROX_H__ static inline __vr _vel_approx_vfdivs_vvvl(__vr v0, __vr v1, int l) { float s0; __vr v2, v3, v4, v5; v5 = _vel_vrcps_vvl(v1, l); s0 = 1.0; v4 = _vel_vfnmsbs_vsvvl(s0, v1, v5, l); v3 = _vel_vfmads_vvvvl(v5, v5, v4, l); v2 = _vel_vfmuls_vvvl(v0, v3, l); v4 = _vel_vfnmsbs_vvvvl(v0, v2, v1, l); v2 = _vel_vfmads_vvvvl(v2, v5, v4, l); v0 = _vel_vfnmsbs_vvvvl(v0, v2, v1, l); v0 = _vel_vfmads_vvvvl(v2, v3, v0, l); return v0; } static inline __vr _vel_approx_pvfdiv_vvvl(__vr v0, __vr v1, int l) { float s0; __vr v2, v3, v4, v5; v5 = _vel_pvrcp_vvl(v1, l); s0 = 1.0; v4 = _vel_pvfnmsb_vsvvl(s0, v1, v5, l); v3 = _vel_pvfmad_vvvvl(v5, v5, v4, l); v2 = _vel_pvfmul_vvvl(v0, v3, l); v4 = _vel_pvfnmsb_vvvvl(v0, v2, v1, l); v2 = _vel_pvfmad_vvvvl(v2, v5, v4, l); v0 = _vel_pvfnmsb_vvvvl(v0, v2, v1, l); v0 = _vel_pvfmad_vvvvl(v2, v3, v0, l); return v0; } static inline __vr _vel_approx_vfdivs_vsvl(float s0, __vr v0, int l) { float s1; __vr v1, v2, v3, v4; v4 = _vel_vrcps_vvl(v0, l); s1 = 1.0; v2 = _vel_vfnmsbs_vsvvl(s1, v0, v4, l); v2 = _vel_vfmads_vvvvl(v4, v4, v2, l); v1 = _vel_vfmuls_vsvl(s0, v2, l); v3 = _vel_vfnmsbs_vsvvl(s0, v1, v0, l); v1 = _vel_vfmads_vvvvl(v1, v4, v3, l); v3 = _vel_vfnmsbs_vsvvl(s0, v1, v0, l); v0 = _vel_vfmads_vvvvl(v1, v2, v3, l); return v0; } static inline __vr _vel_approx_vfdivs_vvsl(__vr v0, float s0, int l) { float s1; __vr v1, v2; s1 = 1.0f / s0; v1 = _vel_vfmuls_vsvl(s1, v0, l); v2 = _vel_vfnmsbs_vvsvl(v0, s0, v1, l); v0 = _vel_vfmads_vvsvl(v1, s1, v2, l); return v0; } static inline __vr _vel_approx_vfdivd_vsvl(double s0, __vr v0, int l) { __vr v1, v2, v3; v2 = _vel_vrcpd_vvl(v0, l); double s1 = 1.0; v3 = _vel_vfnmsbd_vsvvl(s1, v0, v2, l); v2 = _vel_vfmadd_vvvvl(v2, v2, v3, l); v1 = _vel_vfnmsbd_vsvvl(s1, v0, v2, l); v1 = _vel_vfmadd_vvvvl(v2, v2, v1, l); v1 = _vel_vaddul_vsvl(1, v1, l); v3 = _vel_vfnmsbd_vsvvl(s1, v0, v1, l); v3 = _vel_vfmadd_vvvvl(v1, v1, v3, l); v1 = _vel_vfmuld_vsvl(s0, v3, l); v0 = _vel_vfnmsbd_vsvvl(s0, v1, v0, l); v0 = _vel_vfmadd_vvvvl(v1, v3, v0, l); return v0; } static inline __vr _vel_approx_vfsqrtd_vvl(__vr v0, int l) { double s0, s1; __vr v1, v2, v3; v2 = _vel_vrsqrtdnex_vvl(v0, l); v1 = _vel_vfmuld_vvvl(v0, v2, l); s0 = 1.0; s1 = 0.5; v3 = _vel_vfnmsbd_vsvvl(s0, v1, v2, l); v3 = _vel_vfmuld_vsvl(s1, v3, l); v2 = _vel_vfmadd_vvvvl(v2, v2, v3, l); v1 = _vel_vfmuld_vvvl(v0, v2, l); v3 = _vel_vfnmsbd_vsvvl(s0, v1, v2, l); v3 = _vel_vfmuld_vsvl(s1, v3, l); v0 = _vel_vfmadd_vvvvl(v1, v1, v3, l); return v0; } static inline __vr _vel_approx_vfsqrts_vvl(__vr v0, int l) { float s0, s1; __vr v1, v2, v3; v0 = _vel_vcvtds_vvl(v0, l); v2 = _vel_vrsqrtdnex_vvl(v0, l); v1 = _vel_vfmuld_vvvl(v0, v2, l); s0 = 1.0; s1 = 0.5; v3 = _vel_vfnmsbd_vsvvl(s0, v1, v2, l); v3 = _vel_vfmuld_vsvl(s1, v3, l); v2 = _vel_vfmadd_vvvvl(v2, v2, v3, l); v1 = _vel_vfmuld_vvvl(v0, v2, l); v3 = _vel_vfnmsbd_vsvvl(s0, v1, v2, l); v3 = _vel_vfmuld_vsvl(s1, v3, l); v0 = _vel_vfmadd_vvvvl(v1, v1, v3, l); v0 = _vel_vcvtsd_vvl(v0, l); return v0; } #endif #define _vel_vld_vssl __builtin_ve_vl_vld_vssl #define _vel_vld_vssvl __builtin_ve_vl_vld_vssvl #define _vel_vldnc_vssl __builtin_ve_vl_vldnc_vssl #define _vel_vldnc_vssvl __builtin_ve_vl_vldnc_vssvl #define _vel_vldu_vssl __builtin_ve_vl_vldu_vssl #define _vel_vldu_vssvl __builtin_ve_vl_vldu_vssvl #define _vel_vldunc_vssl __builtin_ve_vl_vldunc_vssl #define _vel_vldunc_vssvl __builtin_ve_vl_vldunc_vssvl #define _vel_vldlsx_vssl __builtin_ve_vl_vldlsx_vssl #define _vel_vldlsx_vssvl __builtin_ve_vl_vldlsx_vssvl #define _vel_vldlsxnc_vssl __builtin_ve_vl_vldlsxnc_vssl #define _vel_vldlsxnc_vssvl __builtin_ve_vl_vldlsxnc_vssvl #define _vel_vldlzx_vssl __builtin_ve_vl_vldlzx_vssl #define _vel_vldlzx_vssvl __builtin_ve_vl_vldlzx_vssvl #define _vel_vldlzxnc_vssl __builtin_ve_vl_vldlzxnc_vssl #define _vel_vldlzxnc_vssvl __builtin_ve_vl_vldlzxnc_vssvl #define _vel_vld2d_vssl __builtin_ve_vl_vld2d_vssl #define _vel_vld2d_vssvl __builtin_ve_vl_vld2d_vssvl #define _vel_vld2dnc_vssl __builtin_ve_vl_vld2dnc_vssl #define _vel_vld2dnc_vssvl __builtin_ve_vl_vld2dnc_vssvl #define _vel_vldu2d_vssl __builtin_ve_vl_vldu2d_vssl #define _vel_vldu2d_vssvl __builtin_ve_vl_vldu2d_vssvl #define _vel_vldu2dnc_vssl __builtin_ve_vl_vldu2dnc_vssl #define _vel_vldu2dnc_vssvl __builtin_ve_vl_vldu2dnc_vssvl #define _vel_vldl2dsx_vssl __builtin_ve_vl_vldl2dsx_vssl #define _vel_vldl2dsx_vssvl __builtin_ve_vl_vldl2dsx_vssvl #define _vel_vldl2dsxnc_vssl __builtin_ve_vl_vldl2dsxnc_vssl #define _vel_vldl2dsxnc_vssvl __builtin_ve_vl_vldl2dsxnc_vssvl #define _vel_vldl2dzx_vssl __builtin_ve_vl_vldl2dzx_vssl #define _vel_vldl2dzx_vssvl __builtin_ve_vl_vldl2dzx_vssvl #define _vel_vldl2dzxnc_vssl __builtin_ve_vl_vldl2dzxnc_vssl #define _vel_vldl2dzxnc_vssvl __builtin_ve_vl_vldl2dzxnc_vssvl #define _vel_vst_vssl __builtin_ve_vl_vst_vssl #define _vel_vst_vssml __builtin_ve_vl_vst_vssml #define _vel_vstnc_vssl __builtin_ve_vl_vstnc_vssl #define _vel_vstnc_vssml __builtin_ve_vl_vstnc_vssml #define _vel_vstot_vssl __builtin_ve_vl_vstot_vssl #define _vel_vstot_vssml __builtin_ve_vl_vstot_vssml #define _vel_vstncot_vssl __builtin_ve_vl_vstncot_vssl #define _vel_vstncot_vssml __builtin_ve_vl_vstncot_vssml #define _vel_vstu_vssl __builtin_ve_vl_vstu_vssl #define _vel_vstu_vssml __builtin_ve_vl_vstu_vssml #define _vel_vstunc_vssl __builtin_ve_vl_vstunc_vssl #define _vel_vstunc_vssml __builtin_ve_vl_vstunc_vssml #define _vel_vstuot_vssl __builtin_ve_vl_vstuot_vssl #define _vel_vstuot_vssml __builtin_ve_vl_vstuot_vssml #define _vel_vstuncot_vssl __builtin_ve_vl_vstuncot_vssl #define _vel_vstuncot_vssml __builtin_ve_vl_vstuncot_vssml #define _vel_vstl_vssl __builtin_ve_vl_vstl_vssl #define _vel_vstl_vssml __builtin_ve_vl_vstl_vssml #define _vel_vstlnc_vssl __builtin_ve_vl_vstlnc_vssl #define _vel_vstlnc_vssml __builtin_ve_vl_vstlnc_vssml #define _vel_vstlot_vssl __builtin_ve_vl_vstlot_vssl #define _vel_vstlot_vssml __builtin_ve_vl_vstlot_vssml #define _vel_vstlncot_vssl __builtin_ve_vl_vstlncot_vssl #define _vel_vstlncot_vssml __builtin_ve_vl_vstlncot_vssml #define _vel_vst2d_vssl __builtin_ve_vl_vst2d_vssl #define _vel_vst2d_vssml __builtin_ve_vl_vst2d_vssml #define _vel_vst2dnc_vssl __builtin_ve_vl_vst2dnc_vssl #define _vel_vst2dnc_vssml __builtin_ve_vl_vst2dnc_vssml #define _vel_vst2dot_vssl __builtin_ve_vl_vst2dot_vssl #define _vel_vst2dot_vssml __builtin_ve_vl_vst2dot_vssml #define _vel_vst2dncot_vssl __builtin_ve_vl_vst2dncot_vssl #define _vel_vst2dncot_vssml __builtin_ve_vl_vst2dncot_vssml #define _vel_vstu2d_vssl __builtin_ve_vl_vstu2d_vssl #define _vel_vstu2d_vssml __builtin_ve_vl_vstu2d_vssml #define _vel_vstu2dnc_vssl __builtin_ve_vl_vstu2dnc_vssl #define _vel_vstu2dnc_vssml __builtin_ve_vl_vstu2dnc_vssml #define _vel_vstu2dot_vssl __builtin_ve_vl_vstu2dot_vssl #define _vel_vstu2dot_vssml __builtin_ve_vl_vstu2dot_vssml #define _vel_vstu2dncot_vssl __builtin_ve_vl_vstu2dncot_vssl #define _vel_vstu2dncot_vssml __builtin_ve_vl_vstu2dncot_vssml #define _vel_vstl2d_vssl __builtin_ve_vl_vstl2d_vssl #define _vel_vstl2d_vssml __builtin_ve_vl_vstl2d_vssml #define _vel_vstl2dnc_vssl __builtin_ve_vl_vstl2dnc_vssl #define _vel_vstl2dnc_vssml __builtin_ve_vl_vstl2dnc_vssml #define _vel_vstl2dot_vssl __builtin_ve_vl_vstl2dot_vssl #define _vel_vstl2dot_vssml __builtin_ve_vl_vstl2dot_vssml #define _vel_vstl2dncot_vssl __builtin_ve_vl_vstl2dncot_vssl #define _vel_vstl2dncot_vssml __builtin_ve_vl_vstl2dncot_vssml #define _vel_pfchv_ssl __builtin_ve_vl_pfchv_ssl #define _vel_pfchvnc_ssl __builtin_ve_vl_pfchvnc_ssl #define _vel_lsv_vvss __builtin_ve_vl_lsv_vvss #define _vel_lvsl_svs __builtin_ve_vl_lvsl_svs #define _vel_lvsd_svs __builtin_ve_vl_lvsd_svs #define _vel_lvss_svs __builtin_ve_vl_lvss_svs #define _vel_lvm_mmss __builtin_ve_vl_lvm_mmss #define _vel_lvm_MMss __builtin_ve_vl_lvm_MMss #define _vel_svm_sms __builtin_ve_vl_svm_sms #define _vel_svm_sMs __builtin_ve_vl_svm_sMs #define _vel_vbrdd_vsl __builtin_ve_vl_vbrdd_vsl #define _vel_vbrdd_vsvl __builtin_ve_vl_vbrdd_vsvl #define _vel_vbrdd_vsmvl __builtin_ve_vl_vbrdd_vsmvl #define _vel_vbrdl_vsl __builtin_ve_vl_vbrdl_vsl #define _vel_vbrdl_vsvl __builtin_ve_vl_vbrdl_vsvl #define _vel_vbrdl_vsmvl __builtin_ve_vl_vbrdl_vsmvl #define _vel_vbrds_vsl __builtin_ve_vl_vbrds_vsl #define _vel_vbrds_vsvl __builtin_ve_vl_vbrds_vsvl #define _vel_vbrds_vsmvl __builtin_ve_vl_vbrds_vsmvl #define _vel_vbrdw_vsl __builtin_ve_vl_vbrdw_vsl #define _vel_vbrdw_vsvl __builtin_ve_vl_vbrdw_vsvl #define _vel_vbrdw_vsmvl __builtin_ve_vl_vbrdw_vsmvl #define _vel_pvbrd_vsl __builtin_ve_vl_pvbrd_vsl #define _vel_pvbrd_vsvl __builtin_ve_vl_pvbrd_vsvl #define _vel_pvbrd_vsMvl __builtin_ve_vl_pvbrd_vsMvl #define _vel_vmv_vsvl __builtin_ve_vl_vmv_vsvl #define _vel_vmv_vsvvl __builtin_ve_vl_vmv_vsvvl #define _vel_vmv_vsvmvl __builtin_ve_vl_vmv_vsvmvl #define _vel_vaddul_vvvl __builtin_ve_vl_vaddul_vvvl #define _vel_vaddul_vvvvl __builtin_ve_vl_vaddul_vvvvl #define _vel_vaddul_vsvl __builtin_ve_vl_vaddul_vsvl #define _vel_vaddul_vsvvl __builtin_ve_vl_vaddul_vsvvl #define _vel_vaddul_vvvmvl __builtin_ve_vl_vaddul_vvvmvl #define _vel_vaddul_vsvmvl __builtin_ve_vl_vaddul_vsvmvl #define _vel_vadduw_vvvl __builtin_ve_vl_vadduw_vvvl #define _vel_vadduw_vvvvl __builtin_ve_vl_vadduw_vvvvl #define _vel_vadduw_vsvl __builtin_ve_vl_vadduw_vsvl #define _vel_vadduw_vsvvl __builtin_ve_vl_vadduw_vsvvl #define _vel_vadduw_vvvmvl __builtin_ve_vl_vadduw_vvvmvl #define _vel_vadduw_vsvmvl __builtin_ve_vl_vadduw_vsvmvl #define _vel_pvaddu_vvvl __builtin_ve_vl_pvaddu_vvvl #define _vel_pvaddu_vvvvl __builtin_ve_vl_pvaddu_vvvvl #define _vel_pvaddu_vsvl __builtin_ve_vl_pvaddu_vsvl #define _vel_pvaddu_vsvvl __builtin_ve_vl_pvaddu_vsvvl #define _vel_pvaddu_vvvMvl __builtin_ve_vl_pvaddu_vvvMvl #define _vel_pvaddu_vsvMvl __builtin_ve_vl_pvaddu_vsvMvl #define _vel_vaddswsx_vvvl __builtin_ve_vl_vaddswsx_vvvl #define _vel_vaddswsx_vvvvl __builtin_ve_vl_vaddswsx_vvvvl #define _vel_vaddswsx_vsvl __builtin_ve_vl_vaddswsx_vsvl #define _vel_vaddswsx_vsvvl __builtin_ve_vl_vaddswsx_vsvvl #define _vel_vaddswsx_vvvmvl __builtin_ve_vl_vaddswsx_vvvmvl #define _vel_vaddswsx_vsvmvl __builtin_ve_vl_vaddswsx_vsvmvl #define _vel_vaddswzx_vvvl __builtin_ve_vl_vaddswzx_vvvl #define _vel_vaddswzx_vvvvl __builtin_ve_vl_vaddswzx_vvvvl #define _vel_vaddswzx_vsvl __builtin_ve_vl_vaddswzx_vsvl #define _vel_vaddswzx_vsvvl __builtin_ve_vl_vaddswzx_vsvvl #define _vel_vaddswzx_vvvmvl __builtin_ve_vl_vaddswzx_vvvmvl #define _vel_vaddswzx_vsvmvl __builtin_ve_vl_vaddswzx_vsvmvl #define _vel_pvadds_vvvl __builtin_ve_vl_pvadds_vvvl #define _vel_pvadds_vvvvl __builtin_ve_vl_pvadds_vvvvl #define _vel_pvadds_vsvl __builtin_ve_vl_pvadds_vsvl #define _vel_pvadds_vsvvl __builtin_ve_vl_pvadds_vsvvl #define _vel_pvadds_vvvMvl __builtin_ve_vl_pvadds_vvvMvl #define _vel_pvadds_vsvMvl __builtin_ve_vl_pvadds_vsvMvl #define _vel_vaddsl_vvvl __builtin_ve_vl_vaddsl_vvvl #define _vel_vaddsl_vvvvl __builtin_ve_vl_vaddsl_vvvvl #define _vel_vaddsl_vsvl __builtin_ve_vl_vaddsl_vsvl #define _vel_vaddsl_vsvvl __builtin_ve_vl_vaddsl_vsvvl #define _vel_vaddsl_vvvmvl __builtin_ve_vl_vaddsl_vvvmvl #define _vel_vaddsl_vsvmvl __builtin_ve_vl_vaddsl_vsvmvl #define _vel_vsubul_vvvl __builtin_ve_vl_vsubul_vvvl #define _vel_vsubul_vvvvl __builtin_ve_vl_vsubul_vvvvl #define _vel_vsubul_vsvl __builtin_ve_vl_vsubul_vsvl #define _vel_vsubul_vsvvl __builtin_ve_vl_vsubul_vsvvl #define _vel_vsubul_vvvmvl __builtin_ve_vl_vsubul_vvvmvl #define _vel_vsubul_vsvmvl __builtin_ve_vl_vsubul_vsvmvl #define _vel_vsubuw_vvvl __builtin_ve_vl_vsubuw_vvvl #define _vel_vsubuw_vvvvl __builtin_ve_vl_vsubuw_vvvvl #define _vel_vsubuw_vsvl __builtin_ve_vl_vsubuw_vsvl #define _vel_vsubuw_vsvvl __builtin_ve_vl_vsubuw_vsvvl #define _vel_vsubuw_vvvmvl __builtin_ve_vl_vsubuw_vvvmvl #define _vel_vsubuw_vsvmvl __builtin_ve_vl_vsubuw_vsvmvl #define _vel_pvsubu_vvvl __builtin_ve_vl_pvsubu_vvvl #define _vel_pvsubu_vvvvl __builtin_ve_vl_pvsubu_vvvvl #define _vel_pvsubu_vsvl __builtin_ve_vl_pvsubu_vsvl #define _vel_pvsubu_vsvvl __builtin_ve_vl_pvsubu_vsvvl #define _vel_pvsubu_vvvMvl __builtin_ve_vl_pvsubu_vvvMvl #define _vel_pvsubu_vsvMvl __builtin_ve_vl_pvsubu_vsvMvl #define _vel_vsubswsx_vvvl __builtin_ve_vl_vsubswsx_vvvl #define _vel_vsubswsx_vvvvl __builtin_ve_vl_vsubswsx_vvvvl #define _vel_vsubswsx_vsvl __builtin_ve_vl_vsubswsx_vsvl #define _vel_vsubswsx_vsvvl __builtin_ve_vl_vsubswsx_vsvvl #define _vel_vsubswsx_vvvmvl __builtin_ve_vl_vsubswsx_vvvmvl #define _vel_vsubswsx_vsvmvl __builtin_ve_vl_vsubswsx_vsvmvl #define _vel_vsubswzx_vvvl __builtin_ve_vl_vsubswzx_vvvl #define _vel_vsubswzx_vvvvl __builtin_ve_vl_vsubswzx_vvvvl #define _vel_vsubswzx_vsvl __builtin_ve_vl_vsubswzx_vsvl #define _vel_vsubswzx_vsvvl __builtin_ve_vl_vsubswzx_vsvvl #define _vel_vsubswzx_vvvmvl __builtin_ve_vl_vsubswzx_vvvmvl #define _vel_vsubswzx_vsvmvl __builtin_ve_vl_vsubswzx_vsvmvl #define _vel_pvsubs_vvvl __builtin_ve_vl_pvsubs_vvvl #define _vel_pvsubs_vvvvl __builtin_ve_vl_pvsubs_vvvvl #define _vel_pvsubs_vsvl __builtin_ve_vl_pvsubs_vsvl #define _vel_pvsubs_vsvvl __builtin_ve_vl_pvsubs_vsvvl #define _vel_pvsubs_vvvMvl __builtin_ve_vl_pvsubs_vvvMvl #define _vel_pvsubs_vsvMvl __builtin_ve_vl_pvsubs_vsvMvl #define _vel_vsubsl_vvvl __builtin_ve_vl_vsubsl_vvvl #define _vel_vsubsl_vvvvl __builtin_ve_vl_vsubsl_vvvvl #define _vel_vsubsl_vsvl __builtin_ve_vl_vsubsl_vsvl #define _vel_vsubsl_vsvvl __builtin_ve_vl_vsubsl_vsvvl #define _vel_vsubsl_vvvmvl __builtin_ve_vl_vsubsl_vvvmvl #define _vel_vsubsl_vsvmvl __builtin_ve_vl_vsubsl_vsvmvl #define _vel_vmulul_vvvl __builtin_ve_vl_vmulul_vvvl #define _vel_vmulul_vvvvl __builtin_ve_vl_vmulul_vvvvl #define _vel_vmulul_vsvl __builtin_ve_vl_vmulul_vsvl #define _vel_vmulul_vsvvl __builtin_ve_vl_vmulul_vsvvl #define _vel_vmulul_vvvmvl __builtin_ve_vl_vmulul_vvvmvl #define _vel_vmulul_vsvmvl __builtin_ve_vl_vmulul_vsvmvl #define _vel_vmuluw_vvvl __builtin_ve_vl_vmuluw_vvvl #define _vel_vmuluw_vvvvl __builtin_ve_vl_vmuluw_vvvvl #define _vel_vmuluw_vsvl __builtin_ve_vl_vmuluw_vsvl #define _vel_vmuluw_vsvvl __builtin_ve_vl_vmuluw_vsvvl #define _vel_vmuluw_vvvmvl __builtin_ve_vl_vmuluw_vvvmvl #define _vel_vmuluw_vsvmvl __builtin_ve_vl_vmuluw_vsvmvl #define _vel_vmulswsx_vvvl __builtin_ve_vl_vmulswsx_vvvl #define _vel_vmulswsx_vvvvl __builtin_ve_vl_vmulswsx_vvvvl #define _vel_vmulswsx_vsvl __builtin_ve_vl_vmulswsx_vsvl #define _vel_vmulswsx_vsvvl __builtin_ve_vl_vmulswsx_vsvvl #define _vel_vmulswsx_vvvmvl __builtin_ve_vl_vmulswsx_vvvmvl #define _vel_vmulswsx_vsvmvl __builtin_ve_vl_vmulswsx_vsvmvl #define _vel_vmulswzx_vvvl __builtin_ve_vl_vmulswzx_vvvl #define _vel_vmulswzx_vvvvl __builtin_ve_vl_vmulswzx_vvvvl #define _vel_vmulswzx_vsvl __builtin_ve_vl_vmulswzx_vsvl #define _vel_vmulswzx_vsvvl __builtin_ve_vl_vmulswzx_vsvvl #define _vel_vmulswzx_vvvmvl __builtin_ve_vl_vmulswzx_vvvmvl #define _vel_vmulswzx_vsvmvl __builtin_ve_vl_vmulswzx_vsvmvl #define _vel_vmulsl_vvvl __builtin_ve_vl_vmulsl_vvvl #define _vel_vmulsl_vvvvl __builtin_ve_vl_vmulsl_vvvvl #define _vel_vmulsl_vsvl __builtin_ve_vl_vmulsl_vsvl #define _vel_vmulsl_vsvvl __builtin_ve_vl_vmulsl_vsvvl #define _vel_vmulsl_vvvmvl __builtin_ve_vl_vmulsl_vvvmvl #define _vel_vmulsl_vsvmvl __builtin_ve_vl_vmulsl_vsvmvl #define _vel_vmulslw_vvvl __builtin_ve_vl_vmulslw_vvvl #define _vel_vmulslw_vvvvl __builtin_ve_vl_vmulslw_vvvvl #define _vel_vmulslw_vsvl __builtin_ve_vl_vmulslw_vsvl #define _vel_vmulslw_vsvvl __builtin_ve_vl_vmulslw_vsvvl #define _vel_vdivul_vvvl __builtin_ve_vl_vdivul_vvvl #define _vel_vdivul_vvvvl __builtin_ve_vl_vdivul_vvvvl #define _vel_vdivul_vsvl __builtin_ve_vl_vdivul_vsvl #define _vel_vdivul_vsvvl __builtin_ve_vl_vdivul_vsvvl #define _vel_vdivul_vvvmvl __builtin_ve_vl_vdivul_vvvmvl #define _vel_vdivul_vsvmvl __builtin_ve_vl_vdivul_vsvmvl #define _vel_vdivuw_vvvl __builtin_ve_vl_vdivuw_vvvl #define _vel_vdivuw_vvvvl __builtin_ve_vl_vdivuw_vvvvl #define _vel_vdivuw_vsvl __builtin_ve_vl_vdivuw_vsvl #define _vel_vdivuw_vsvvl __builtin_ve_vl_vdivuw_vsvvl #define _vel_vdivuw_vvvmvl __builtin_ve_vl_vdivuw_vvvmvl #define _vel_vdivuw_vsvmvl __builtin_ve_vl_vdivuw_vsvmvl #define _vel_vdivul_vvsl __builtin_ve_vl_vdivul_vvsl #define _vel_vdivul_vvsvl __builtin_ve_vl_vdivul_vvsvl #define _vel_vdivul_vvsmvl __builtin_ve_vl_vdivul_vvsmvl #define _vel_vdivuw_vvsl __builtin_ve_vl_vdivuw_vvsl #define _vel_vdivuw_vvsvl __builtin_ve_vl_vdivuw_vvsvl #define _vel_vdivuw_vvsmvl __builtin_ve_vl_vdivuw_vvsmvl #define _vel_vdivswsx_vvvl __builtin_ve_vl_vdivswsx_vvvl #define _vel_vdivswsx_vvvvl __builtin_ve_vl_vdivswsx_vvvvl #define _vel_vdivswsx_vsvl __builtin_ve_vl_vdivswsx_vsvl #define _vel_vdivswsx_vsvvl __builtin_ve_vl_vdivswsx_vsvvl #define _vel_vdivswsx_vvvmvl __builtin_ve_vl_vdivswsx_vvvmvl #define _vel_vdivswsx_vsvmvl __builtin_ve_vl_vdivswsx_vsvmvl #define _vel_vdivswzx_vvvl __builtin_ve_vl_vdivswzx_vvvl #define _vel_vdivswzx_vvvvl __builtin_ve_vl_vdivswzx_vvvvl #define _vel_vdivswzx_vsvl __builtin_ve_vl_vdivswzx_vsvl #define _vel_vdivswzx_vsvvl __builtin_ve_vl_vdivswzx_vsvvl #define _vel_vdivswzx_vvvmvl __builtin_ve_vl_vdivswzx_vvvmvl #define _vel_vdivswzx_vsvmvl __builtin_ve_vl_vdivswzx_vsvmvl #define _vel_vdivswsx_vvsl __builtin_ve_vl_vdivswsx_vvsl #define _vel_vdivswsx_vvsvl __builtin_ve_vl_vdivswsx_vvsvl #define _vel_vdivswsx_vvsmvl __builtin_ve_vl_vdivswsx_vvsmvl #define _vel_vdivswzx_vvsl __builtin_ve_vl_vdivswzx_vvsl #define _vel_vdivswzx_vvsvl __builtin_ve_vl_vdivswzx_vvsvl #define _vel_vdivswzx_vvsmvl __builtin_ve_vl_vdivswzx_vvsmvl #define _vel_vdivsl_vvvl __builtin_ve_vl_vdivsl_vvvl #define _vel_vdivsl_vvvvl __builtin_ve_vl_vdivsl_vvvvl #define _vel_vdivsl_vsvl __builtin_ve_vl_vdivsl_vsvl #define _vel_vdivsl_vsvvl __builtin_ve_vl_vdivsl_vsvvl #define _vel_vdivsl_vvvmvl __builtin_ve_vl_vdivsl_vvvmvl #define _vel_vdivsl_vsvmvl __builtin_ve_vl_vdivsl_vsvmvl #define _vel_vdivsl_vvsl __builtin_ve_vl_vdivsl_vvsl #define _vel_vdivsl_vvsvl __builtin_ve_vl_vdivsl_vvsvl #define _vel_vdivsl_vvsmvl __builtin_ve_vl_vdivsl_vvsmvl #define _vel_vcmpul_vvvl __builtin_ve_vl_vcmpul_vvvl #define _vel_vcmpul_vvvvl __builtin_ve_vl_vcmpul_vvvvl #define _vel_vcmpul_vsvl __builtin_ve_vl_vcmpul_vsvl #define _vel_vcmpul_vsvvl __builtin_ve_vl_vcmpul_vsvvl #define _vel_vcmpul_vvvmvl __builtin_ve_vl_vcmpul_vvvmvl #define _vel_vcmpul_vsvmvl __builtin_ve_vl_vcmpul_vsvmvl #define _vel_vcmpuw_vvvl __builtin_ve_vl_vcmpuw_vvvl #define _vel_vcmpuw_vvvvl __builtin_ve_vl_vcmpuw_vvvvl #define _vel_vcmpuw_vsvl __builtin_ve_vl_vcmpuw_vsvl #define _vel_vcmpuw_vsvvl __builtin_ve_vl_vcmpuw_vsvvl #define _vel_vcmpuw_vvvmvl __builtin_ve_vl_vcmpuw_vvvmvl #define _vel_vcmpuw_vsvmvl __builtin_ve_vl_vcmpuw_vsvmvl #define _vel_pvcmpu_vvvl __builtin_ve_vl_pvcmpu_vvvl #define _vel_pvcmpu_vvvvl __builtin_ve_vl_pvcmpu_vvvvl #define _vel_pvcmpu_vsvl __builtin_ve_vl_pvcmpu_vsvl #define _vel_pvcmpu_vsvvl __builtin_ve_vl_pvcmpu_vsvvl #define _vel_pvcmpu_vvvMvl __builtin_ve_vl_pvcmpu_vvvMvl #define _vel_pvcmpu_vsvMvl __builtin_ve_vl_pvcmpu_vsvMvl #define _vel_vcmpswsx_vvvl __builtin_ve_vl_vcmpswsx_vvvl #define _vel_vcmpswsx_vvvvl __builtin_ve_vl_vcmpswsx_vvvvl #define _vel_vcmpswsx_vsvl __builtin_ve_vl_vcmpswsx_vsvl #define _vel_vcmpswsx_vsvvl __builtin_ve_vl_vcmpswsx_vsvvl #define _vel_vcmpswsx_vvvmvl __builtin_ve_vl_vcmpswsx_vvvmvl #define _vel_vcmpswsx_vsvmvl __builtin_ve_vl_vcmpswsx_vsvmvl #define _vel_vcmpswzx_vvvl __builtin_ve_vl_vcmpswzx_vvvl #define _vel_vcmpswzx_vvvvl __builtin_ve_vl_vcmpswzx_vvvvl #define _vel_vcmpswzx_vsvl __builtin_ve_vl_vcmpswzx_vsvl #define _vel_vcmpswzx_vsvvl __builtin_ve_vl_vcmpswzx_vsvvl #define _vel_vcmpswzx_vvvmvl __builtin_ve_vl_vcmpswzx_vvvmvl #define _vel_vcmpswzx_vsvmvl __builtin_ve_vl_vcmpswzx_vsvmvl #define _vel_pvcmps_vvvl __builtin_ve_vl_pvcmps_vvvl #define _vel_pvcmps_vvvvl __builtin_ve_vl_pvcmps_vvvvl #define _vel_pvcmps_vsvl __builtin_ve_vl_pvcmps_vsvl #define _vel_pvcmps_vsvvl __builtin_ve_vl_pvcmps_vsvvl #define _vel_pvcmps_vvvMvl __builtin_ve_vl_pvcmps_vvvMvl #define _vel_pvcmps_vsvMvl __builtin_ve_vl_pvcmps_vsvMvl #define _vel_vcmpsl_vvvl __builtin_ve_vl_vcmpsl_vvvl #define _vel_vcmpsl_vvvvl __builtin_ve_vl_vcmpsl_vvvvl #define _vel_vcmpsl_vsvl __builtin_ve_vl_vcmpsl_vsvl #define _vel_vcmpsl_vsvvl __builtin_ve_vl_vcmpsl_vsvvl #define _vel_vcmpsl_vvvmvl __builtin_ve_vl_vcmpsl_vvvmvl #define _vel_vcmpsl_vsvmvl __builtin_ve_vl_vcmpsl_vsvmvl #define _vel_vmaxswsx_vvvl __builtin_ve_vl_vmaxswsx_vvvl #define _vel_vmaxswsx_vvvvl __builtin_ve_vl_vmaxswsx_vvvvl #define _vel_vmaxswsx_vsvl __builtin_ve_vl_vmaxswsx_vsvl #define _vel_vmaxswsx_vsvvl __builtin_ve_vl_vmaxswsx_vsvvl #define _vel_vmaxswsx_vvvmvl __builtin_ve_vl_vmaxswsx_vvvmvl #define _vel_vmaxswsx_vsvmvl __builtin_ve_vl_vmaxswsx_vsvmvl #define _vel_vmaxswzx_vvvl __builtin_ve_vl_vmaxswzx_vvvl #define _vel_vmaxswzx_vvvvl __builtin_ve_vl_vmaxswzx_vvvvl #define _vel_vmaxswzx_vsvl __builtin_ve_vl_vmaxswzx_vsvl #define _vel_vmaxswzx_vsvvl __builtin_ve_vl_vmaxswzx_vsvvl #define _vel_vmaxswzx_vvvmvl __builtin_ve_vl_vmaxswzx_vvvmvl #define _vel_vmaxswzx_vsvmvl __builtin_ve_vl_vmaxswzx_vsvmvl #define _vel_pvmaxs_vvvl __builtin_ve_vl_pvmaxs_vvvl #define _vel_pvmaxs_vvvvl __builtin_ve_vl_pvmaxs_vvvvl #define _vel_pvmaxs_vsvl __builtin_ve_vl_pvmaxs_vsvl #define _vel_pvmaxs_vsvvl __builtin_ve_vl_pvmaxs_vsvvl #define _vel_pvmaxs_vvvMvl __builtin_ve_vl_pvmaxs_vvvMvl #define _vel_pvmaxs_vsvMvl __builtin_ve_vl_pvmaxs_vsvMvl #define _vel_vminswsx_vvvl __builtin_ve_vl_vminswsx_vvvl #define _vel_vminswsx_vvvvl __builtin_ve_vl_vminswsx_vvvvl #define _vel_vminswsx_vsvl __builtin_ve_vl_vminswsx_vsvl #define _vel_vminswsx_vsvvl __builtin_ve_vl_vminswsx_vsvvl #define _vel_vminswsx_vvvmvl __builtin_ve_vl_vminswsx_vvvmvl #define _vel_vminswsx_vsvmvl __builtin_ve_vl_vminswsx_vsvmvl #define _vel_vminswzx_vvvl __builtin_ve_vl_vminswzx_vvvl #define _vel_vminswzx_vvvvl __builtin_ve_vl_vminswzx_vvvvl #define _vel_vminswzx_vsvl __builtin_ve_vl_vminswzx_vsvl #define _vel_vminswzx_vsvvl __builtin_ve_vl_vminswzx_vsvvl #define _vel_vminswzx_vvvmvl __builtin_ve_vl_vminswzx_vvvmvl #define _vel_vminswzx_vsvmvl __builtin_ve_vl_vminswzx_vsvmvl #define _vel_pvmins_vvvl __builtin_ve_vl_pvmins_vvvl #define _vel_pvmins_vvvvl __builtin_ve_vl_pvmins_vvvvl #define _vel_pvmins_vsvl __builtin_ve_vl_pvmins_vsvl #define _vel_pvmins_vsvvl __builtin_ve_vl_pvmins_vsvvl #define _vel_pvmins_vvvMvl __builtin_ve_vl_pvmins_vvvMvl #define _vel_pvmins_vsvMvl __builtin_ve_vl_pvmins_vsvMvl #define _vel_vmaxsl_vvvl __builtin_ve_vl_vmaxsl_vvvl #define _vel_vmaxsl_vvvvl __builtin_ve_vl_vmaxsl_vvvvl #define _vel_vmaxsl_vsvl __builtin_ve_vl_vmaxsl_vsvl #define _vel_vmaxsl_vsvvl __builtin_ve_vl_vmaxsl_vsvvl #define _vel_vmaxsl_vvvmvl __builtin_ve_vl_vmaxsl_vvvmvl #define _vel_vmaxsl_vsvmvl __builtin_ve_vl_vmaxsl_vsvmvl #define _vel_vminsl_vvvl __builtin_ve_vl_vminsl_vvvl #define _vel_vminsl_vvvvl __builtin_ve_vl_vminsl_vvvvl #define _vel_vminsl_vsvl __builtin_ve_vl_vminsl_vsvl #define _vel_vminsl_vsvvl __builtin_ve_vl_vminsl_vsvvl #define _vel_vminsl_vvvmvl __builtin_ve_vl_vminsl_vvvmvl #define _vel_vminsl_vsvmvl __builtin_ve_vl_vminsl_vsvmvl #define _vel_vand_vvvl __builtin_ve_vl_vand_vvvl #define _vel_vand_vvvvl __builtin_ve_vl_vand_vvvvl #define _vel_vand_vsvl __builtin_ve_vl_vand_vsvl #define _vel_vand_vsvvl __builtin_ve_vl_vand_vsvvl #define _vel_vand_vvvmvl __builtin_ve_vl_vand_vvvmvl #define _vel_vand_vsvmvl __builtin_ve_vl_vand_vsvmvl #define _vel_pvand_vvvl __builtin_ve_vl_pvand_vvvl #define _vel_pvand_vvvvl __builtin_ve_vl_pvand_vvvvl #define _vel_pvand_vsvl __builtin_ve_vl_pvand_vsvl #define _vel_pvand_vsvvl __builtin_ve_vl_pvand_vsvvl #define _vel_pvand_vvvMvl __builtin_ve_vl_pvand_vvvMvl #define _vel_pvand_vsvMvl __builtin_ve_vl_pvand_vsvMvl #define _vel_vor_vvvl __builtin_ve_vl_vor_vvvl #define _vel_vor_vvvvl __builtin_ve_vl_vor_vvvvl #define _vel_vor_vsvl __builtin_ve_vl_vor_vsvl #define _vel_vor_vsvvl __builtin_ve_vl_vor_vsvvl #define _vel_vor_vvvmvl __builtin_ve_vl_vor_vvvmvl #define _vel_vor_vsvmvl __builtin_ve_vl_vor_vsvmvl #define _vel_pvor_vvvl __builtin_ve_vl_pvor_vvvl #define _vel_pvor_vvvvl __builtin_ve_vl_pvor_vvvvl #define _vel_pvor_vsvl __builtin_ve_vl_pvor_vsvl #define _vel_pvor_vsvvl __builtin_ve_vl_pvor_vsvvl #define _vel_pvor_vvvMvl __builtin_ve_vl_pvor_vvvMvl #define _vel_pvor_vsvMvl __builtin_ve_vl_pvor_vsvMvl #define _vel_vxor_vvvl __builtin_ve_vl_vxor_vvvl #define _vel_vxor_vvvvl __builtin_ve_vl_vxor_vvvvl #define _vel_vxor_vsvl __builtin_ve_vl_vxor_vsvl #define _vel_vxor_vsvvl __builtin_ve_vl_vxor_vsvvl #define _vel_vxor_vvvmvl __builtin_ve_vl_vxor_vvvmvl #define _vel_vxor_vsvmvl __builtin_ve_vl_vxor_vsvmvl #define _vel_pvxor_vvvl __builtin_ve_vl_pvxor_vvvl #define _vel_pvxor_vvvvl __builtin_ve_vl_pvxor_vvvvl #define _vel_pvxor_vsvl __builtin_ve_vl_pvxor_vsvl #define _vel_pvxor_vsvvl __builtin_ve_vl_pvxor_vsvvl #define _vel_pvxor_vvvMvl __builtin_ve_vl_pvxor_vvvMvl #define _vel_pvxor_vsvMvl __builtin_ve_vl_pvxor_vsvMvl #define _vel_veqv_vvvl __builtin_ve_vl_veqv_vvvl #define _vel_veqv_vvvvl __builtin_ve_vl_veqv_vvvvl #define _vel_veqv_vsvl __builtin_ve_vl_veqv_vsvl #define _vel_veqv_vsvvl __builtin_ve_vl_veqv_vsvvl #define _vel_veqv_vvvmvl __builtin_ve_vl_veqv_vvvmvl #define _vel_veqv_vsvmvl __builtin_ve_vl_veqv_vsvmvl #define _vel_pveqv_vvvl __builtin_ve_vl_pveqv_vvvl #define _vel_pveqv_vvvvl __builtin_ve_vl_pveqv_vvvvl #define _vel_pveqv_vsvl __builtin_ve_vl_pveqv_vsvl #define _vel_pveqv_vsvvl __builtin_ve_vl_pveqv_vsvvl #define _vel_pveqv_vvvMvl __builtin_ve_vl_pveqv_vvvMvl #define _vel_pveqv_vsvMvl __builtin_ve_vl_pveqv_vsvMvl #define _vel_vldz_vvl __builtin_ve_vl_vldz_vvl #define _vel_vldz_vvvl __builtin_ve_vl_vldz_vvvl #define _vel_vldz_vvmvl __builtin_ve_vl_vldz_vvmvl #define _vel_pvldzlo_vvl __builtin_ve_vl_pvldzlo_vvl #define _vel_pvldzlo_vvvl __builtin_ve_vl_pvldzlo_vvvl #define _vel_pvldzlo_vvmvl __builtin_ve_vl_pvldzlo_vvmvl #define _vel_pvldzup_vvl __builtin_ve_vl_pvldzup_vvl #define _vel_pvldzup_vvvl __builtin_ve_vl_pvldzup_vvvl #define _vel_pvldzup_vvmvl __builtin_ve_vl_pvldzup_vvmvl #define _vel_pvldz_vvl __builtin_ve_vl_pvldz_vvl #define _vel_pvldz_vvvl __builtin_ve_vl_pvldz_vvvl #define _vel_pvldz_vvMvl __builtin_ve_vl_pvldz_vvMvl #define _vel_vpcnt_vvl __builtin_ve_vl_vpcnt_vvl #define _vel_vpcnt_vvvl __builtin_ve_vl_vpcnt_vvvl #define _vel_vpcnt_vvmvl __builtin_ve_vl_vpcnt_vvmvl #define _vel_pvpcntlo_vvl __builtin_ve_vl_pvpcntlo_vvl #define _vel_pvpcntlo_vvvl __builtin_ve_vl_pvpcntlo_vvvl #define _vel_pvpcntlo_vvmvl __builtin_ve_vl_pvpcntlo_vvmvl #define _vel_pvpcntup_vvl __builtin_ve_vl_pvpcntup_vvl #define _vel_pvpcntup_vvvl __builtin_ve_vl_pvpcntup_vvvl #define _vel_pvpcntup_vvmvl __builtin_ve_vl_pvpcntup_vvmvl #define _vel_pvpcnt_vvl __builtin_ve_vl_pvpcnt_vvl #define _vel_pvpcnt_vvvl __builtin_ve_vl_pvpcnt_vvvl #define _vel_pvpcnt_vvMvl __builtin_ve_vl_pvpcnt_vvMvl #define _vel_vbrv_vvl __builtin_ve_vl_vbrv_vvl #define _vel_vbrv_vvvl __builtin_ve_vl_vbrv_vvvl #define _vel_vbrv_vvmvl __builtin_ve_vl_vbrv_vvmvl #define _vel_pvbrvlo_vvl __builtin_ve_vl_pvbrvlo_vvl #define _vel_pvbrvlo_vvvl __builtin_ve_vl_pvbrvlo_vvvl #define _vel_pvbrvlo_vvmvl __builtin_ve_vl_pvbrvlo_vvmvl #define _vel_pvbrvup_vvl __builtin_ve_vl_pvbrvup_vvl #define _vel_pvbrvup_vvvl __builtin_ve_vl_pvbrvup_vvvl #define _vel_pvbrvup_vvmvl __builtin_ve_vl_pvbrvup_vvmvl #define _vel_pvbrv_vvl __builtin_ve_vl_pvbrv_vvl #define _vel_pvbrv_vvvl __builtin_ve_vl_pvbrv_vvvl #define _vel_pvbrv_vvMvl __builtin_ve_vl_pvbrv_vvMvl #define _vel_vseq_vl __builtin_ve_vl_vseq_vl #define _vel_vseq_vvl __builtin_ve_vl_vseq_vvl #define _vel_pvseqlo_vl __builtin_ve_vl_pvseqlo_vl #define _vel_pvseqlo_vvl __builtin_ve_vl_pvseqlo_vvl #define _vel_pvsequp_vl __builtin_ve_vl_pvsequp_vl #define _vel_pvsequp_vvl __builtin_ve_vl_pvsequp_vvl #define _vel_pvseq_vl __builtin_ve_vl_pvseq_vl #define _vel_pvseq_vvl __builtin_ve_vl_pvseq_vvl #define _vel_vsll_vvvl __builtin_ve_vl_vsll_vvvl #define _vel_vsll_vvvvl __builtin_ve_vl_vsll_vvvvl #define _vel_vsll_vvsl __builtin_ve_vl_vsll_vvsl #define _vel_vsll_vvsvl __builtin_ve_vl_vsll_vvsvl #define _vel_vsll_vvvmvl __builtin_ve_vl_vsll_vvvmvl #define _vel_vsll_vvsmvl __builtin_ve_vl_vsll_vvsmvl #define _vel_pvsll_vvvl __builtin_ve_vl_pvsll_vvvl #define _vel_pvsll_vvvvl __builtin_ve_vl_pvsll_vvvvl #define _vel_pvsll_vvsl __builtin_ve_vl_pvsll_vvsl #define _vel_pvsll_vvsvl __builtin_ve_vl_pvsll_vvsvl #define _vel_pvsll_vvvMvl __builtin_ve_vl_pvsll_vvvMvl #define _vel_pvsll_vvsMvl __builtin_ve_vl_pvsll_vvsMvl #define _vel_vsrl_vvvl __builtin_ve_vl_vsrl_vvvl #define _vel_vsrl_vvvvl __builtin_ve_vl_vsrl_vvvvl #define _vel_vsrl_vvsl __builtin_ve_vl_vsrl_vvsl #define _vel_vsrl_vvsvl __builtin_ve_vl_vsrl_vvsvl #define _vel_vsrl_vvvmvl __builtin_ve_vl_vsrl_vvvmvl #define _vel_vsrl_vvsmvl __builtin_ve_vl_vsrl_vvsmvl #define _vel_pvsrl_vvvl __builtin_ve_vl_pvsrl_vvvl #define _vel_pvsrl_vvvvl __builtin_ve_vl_pvsrl_vvvvl #define _vel_pvsrl_vvsl __builtin_ve_vl_pvsrl_vvsl #define _vel_pvsrl_vvsvl __builtin_ve_vl_pvsrl_vvsvl #define _vel_pvsrl_vvvMvl __builtin_ve_vl_pvsrl_vvvMvl #define _vel_pvsrl_vvsMvl __builtin_ve_vl_pvsrl_vvsMvl #define _vel_vslawsx_vvvl __builtin_ve_vl_vslawsx_vvvl #define _vel_vslawsx_vvvvl __builtin_ve_vl_vslawsx_vvvvl #define _vel_vslawsx_vvsl __builtin_ve_vl_vslawsx_vvsl #define _vel_vslawsx_vvsvl __builtin_ve_vl_vslawsx_vvsvl #define _vel_vslawsx_vvvmvl __builtin_ve_vl_vslawsx_vvvmvl #define _vel_vslawsx_vvsmvl __builtin_ve_vl_vslawsx_vvsmvl #define _vel_vslawzx_vvvl __builtin_ve_vl_vslawzx_vvvl #define _vel_vslawzx_vvvvl __builtin_ve_vl_vslawzx_vvvvl #define _vel_vslawzx_vvsl __builtin_ve_vl_vslawzx_vvsl #define _vel_vslawzx_vvsvl __builtin_ve_vl_vslawzx_vvsvl #define _vel_vslawzx_vvvmvl __builtin_ve_vl_vslawzx_vvvmvl #define _vel_vslawzx_vvsmvl __builtin_ve_vl_vslawzx_vvsmvl #define _vel_pvsla_vvvl __builtin_ve_vl_pvsla_vvvl #define _vel_pvsla_vvvvl __builtin_ve_vl_pvsla_vvvvl #define _vel_pvsla_vvsl __builtin_ve_vl_pvsla_vvsl #define _vel_pvsla_vvsvl __builtin_ve_vl_pvsla_vvsvl #define _vel_pvsla_vvvMvl __builtin_ve_vl_pvsla_vvvMvl #define _vel_pvsla_vvsMvl __builtin_ve_vl_pvsla_vvsMvl #define _vel_vslal_vvvl __builtin_ve_vl_vslal_vvvl #define _vel_vslal_vvvvl __builtin_ve_vl_vslal_vvvvl #define _vel_vslal_vvsl __builtin_ve_vl_vslal_vvsl #define _vel_vslal_vvsvl __builtin_ve_vl_vslal_vvsvl #define _vel_vslal_vvvmvl __builtin_ve_vl_vslal_vvvmvl #define _vel_vslal_vvsmvl __builtin_ve_vl_vslal_vvsmvl #define _vel_vsrawsx_vvvl __builtin_ve_vl_vsrawsx_vvvl #define _vel_vsrawsx_vvvvl __builtin_ve_vl_vsrawsx_vvvvl #define _vel_vsrawsx_vvsl __builtin_ve_vl_vsrawsx_vvsl #define _vel_vsrawsx_vvsvl __builtin_ve_vl_vsrawsx_vvsvl #define _vel_vsrawsx_vvvmvl __builtin_ve_vl_vsrawsx_vvvmvl #define _vel_vsrawsx_vvsmvl __builtin_ve_vl_vsrawsx_vvsmvl #define _vel_vsrawzx_vvvl __builtin_ve_vl_vsrawzx_vvvl #define _vel_vsrawzx_vvvvl __builtin_ve_vl_vsrawzx_vvvvl #define _vel_vsrawzx_vvsl __builtin_ve_vl_vsrawzx_vvsl #define _vel_vsrawzx_vvsvl __builtin_ve_vl_vsrawzx_vvsvl #define _vel_vsrawzx_vvvmvl __builtin_ve_vl_vsrawzx_vvvmvl #define _vel_vsrawzx_vvsmvl __builtin_ve_vl_vsrawzx_vvsmvl #define _vel_pvsra_vvvl __builtin_ve_vl_pvsra_vvvl #define _vel_pvsra_vvvvl __builtin_ve_vl_pvsra_vvvvl #define _vel_pvsra_vvsl __builtin_ve_vl_pvsra_vvsl #define _vel_pvsra_vvsvl __builtin_ve_vl_pvsra_vvsvl #define _vel_pvsra_vvvMvl __builtin_ve_vl_pvsra_vvvMvl #define _vel_pvsra_vvsMvl __builtin_ve_vl_pvsra_vvsMvl #define _vel_vsral_vvvl __builtin_ve_vl_vsral_vvvl #define _vel_vsral_vvvvl __builtin_ve_vl_vsral_vvvvl #define _vel_vsral_vvsl __builtin_ve_vl_vsral_vvsl #define _vel_vsral_vvsvl __builtin_ve_vl_vsral_vvsvl #define _vel_vsral_vvvmvl __builtin_ve_vl_vsral_vvvmvl #define _vel_vsral_vvsmvl __builtin_ve_vl_vsral_vvsmvl #define _vel_vsfa_vvssl __builtin_ve_vl_vsfa_vvssl #define _vel_vsfa_vvssvl __builtin_ve_vl_vsfa_vvssvl #define _vel_vsfa_vvssmvl __builtin_ve_vl_vsfa_vvssmvl #define _vel_vfaddd_vvvl __builtin_ve_vl_vfaddd_vvvl #define _vel_vfaddd_vvvvl __builtin_ve_vl_vfaddd_vvvvl #define _vel_vfaddd_vsvl __builtin_ve_vl_vfaddd_vsvl #define _vel_vfaddd_vsvvl __builtin_ve_vl_vfaddd_vsvvl #define _vel_vfaddd_vvvmvl __builtin_ve_vl_vfaddd_vvvmvl #define _vel_vfaddd_vsvmvl __builtin_ve_vl_vfaddd_vsvmvl #define _vel_vfadds_vvvl __builtin_ve_vl_vfadds_vvvl #define _vel_vfadds_vvvvl __builtin_ve_vl_vfadds_vvvvl #define _vel_vfadds_vsvl __builtin_ve_vl_vfadds_vsvl #define _vel_vfadds_vsvvl __builtin_ve_vl_vfadds_vsvvl #define _vel_vfadds_vvvmvl __builtin_ve_vl_vfadds_vvvmvl #define _vel_vfadds_vsvmvl __builtin_ve_vl_vfadds_vsvmvl #define _vel_pvfadd_vvvl __builtin_ve_vl_pvfadd_vvvl #define _vel_pvfadd_vvvvl __builtin_ve_vl_pvfadd_vvvvl #define _vel_pvfadd_vsvl __builtin_ve_vl_pvfadd_vsvl #define _vel_pvfadd_vsvvl __builtin_ve_vl_pvfadd_vsvvl #define _vel_pvfadd_vvvMvl __builtin_ve_vl_pvfadd_vvvMvl #define _vel_pvfadd_vsvMvl __builtin_ve_vl_pvfadd_vsvMvl #define _vel_vfsubd_vvvl __builtin_ve_vl_vfsubd_vvvl #define _vel_vfsubd_vvvvl __builtin_ve_vl_vfsubd_vvvvl #define _vel_vfsubd_vsvl __builtin_ve_vl_vfsubd_vsvl #define _vel_vfsubd_vsvvl __builtin_ve_vl_vfsubd_vsvvl #define _vel_vfsubd_vvvmvl __builtin_ve_vl_vfsubd_vvvmvl #define _vel_vfsubd_vsvmvl __builtin_ve_vl_vfsubd_vsvmvl #define _vel_vfsubs_vvvl __builtin_ve_vl_vfsubs_vvvl #define _vel_vfsubs_vvvvl __builtin_ve_vl_vfsubs_vvvvl #define _vel_vfsubs_vsvl __builtin_ve_vl_vfsubs_vsvl #define _vel_vfsubs_vsvvl __builtin_ve_vl_vfsubs_vsvvl #define _vel_vfsubs_vvvmvl __builtin_ve_vl_vfsubs_vvvmvl #define _vel_vfsubs_vsvmvl __builtin_ve_vl_vfsubs_vsvmvl #define _vel_pvfsub_vvvl __builtin_ve_vl_pvfsub_vvvl #define _vel_pvfsub_vvvvl __builtin_ve_vl_pvfsub_vvvvl #define _vel_pvfsub_vsvl __builtin_ve_vl_pvfsub_vsvl #define _vel_pvfsub_vsvvl __builtin_ve_vl_pvfsub_vsvvl #define _vel_pvfsub_vvvMvl __builtin_ve_vl_pvfsub_vvvMvl #define _vel_pvfsub_vsvMvl __builtin_ve_vl_pvfsub_vsvMvl #define _vel_vfmuld_vvvl __builtin_ve_vl_vfmuld_vvvl #define _vel_vfmuld_vvvvl __builtin_ve_vl_vfmuld_vvvvl #define _vel_vfmuld_vsvl __builtin_ve_vl_vfmuld_vsvl #define _vel_vfmuld_vsvvl __builtin_ve_vl_vfmuld_vsvvl #define _vel_vfmuld_vvvmvl __builtin_ve_vl_vfmuld_vvvmvl #define _vel_vfmuld_vsvmvl __builtin_ve_vl_vfmuld_vsvmvl #define _vel_vfmuls_vvvl __builtin_ve_vl_vfmuls_vvvl #define _vel_vfmuls_vvvvl __builtin_ve_vl_vfmuls_vvvvl #define _vel_vfmuls_vsvl __builtin_ve_vl_vfmuls_vsvl #define _vel_vfmuls_vsvvl __builtin_ve_vl_vfmuls_vsvvl #define _vel_vfmuls_vvvmvl __builtin_ve_vl_vfmuls_vvvmvl #define _vel_vfmuls_vsvmvl __builtin_ve_vl_vfmuls_vsvmvl #define _vel_pvfmul_vvvl __builtin_ve_vl_pvfmul_vvvl #define _vel_pvfmul_vvvvl __builtin_ve_vl_pvfmul_vvvvl #define _vel_pvfmul_vsvl __builtin_ve_vl_pvfmul_vsvl #define _vel_pvfmul_vsvvl __builtin_ve_vl_pvfmul_vsvvl #define _vel_pvfmul_vvvMvl __builtin_ve_vl_pvfmul_vvvMvl #define _vel_pvfmul_vsvMvl __builtin_ve_vl_pvfmul_vsvMvl #define _vel_vfdivd_vvvl __builtin_ve_vl_vfdivd_vvvl #define _vel_vfdivd_vvvvl __builtin_ve_vl_vfdivd_vvvvl #define _vel_vfdivd_vsvl __builtin_ve_vl_vfdivd_vsvl #define _vel_vfdivd_vsvvl __builtin_ve_vl_vfdivd_vsvvl #define _vel_vfdivd_vvvmvl __builtin_ve_vl_vfdivd_vvvmvl #define _vel_vfdivd_vsvmvl __builtin_ve_vl_vfdivd_vsvmvl #define _vel_vfdivs_vvvl __builtin_ve_vl_vfdivs_vvvl #define _vel_vfdivs_vvvvl __builtin_ve_vl_vfdivs_vvvvl #define _vel_vfdivs_vsvl __builtin_ve_vl_vfdivs_vsvl #define _vel_vfdivs_vsvvl __builtin_ve_vl_vfdivs_vsvvl #define _vel_vfdivs_vvvmvl __builtin_ve_vl_vfdivs_vvvmvl #define _vel_vfdivs_vsvmvl __builtin_ve_vl_vfdivs_vsvmvl #define _vel_vfsqrtd_vvl __builtin_ve_vl_vfsqrtd_vvl #define _vel_vfsqrtd_vvvl __builtin_ve_vl_vfsqrtd_vvvl #define _vel_vfsqrts_vvl __builtin_ve_vl_vfsqrts_vvl #define _vel_vfsqrts_vvvl __builtin_ve_vl_vfsqrts_vvvl #define _vel_vfcmpd_vvvl __builtin_ve_vl_vfcmpd_vvvl #define _vel_vfcmpd_vvvvl __builtin_ve_vl_vfcmpd_vvvvl #define _vel_vfcmpd_vsvl __builtin_ve_vl_vfcmpd_vsvl #define _vel_vfcmpd_vsvvl __builtin_ve_vl_vfcmpd_vsvvl #define _vel_vfcmpd_vvvmvl __builtin_ve_vl_vfcmpd_vvvmvl #define _vel_vfcmpd_vsvmvl __builtin_ve_vl_vfcmpd_vsvmvl #define _vel_vfcmps_vvvl __builtin_ve_vl_vfcmps_vvvl #define _vel_vfcmps_vvvvl __builtin_ve_vl_vfcmps_vvvvl #define _vel_vfcmps_vsvl __builtin_ve_vl_vfcmps_vsvl #define _vel_vfcmps_vsvvl __builtin_ve_vl_vfcmps_vsvvl #define _vel_vfcmps_vvvmvl __builtin_ve_vl_vfcmps_vvvmvl #define _vel_vfcmps_vsvmvl __builtin_ve_vl_vfcmps_vsvmvl #define _vel_pvfcmp_vvvl __builtin_ve_vl_pvfcmp_vvvl #define _vel_pvfcmp_vvvvl __builtin_ve_vl_pvfcmp_vvvvl #define _vel_pvfcmp_vsvl __builtin_ve_vl_pvfcmp_vsvl #define _vel_pvfcmp_vsvvl __builtin_ve_vl_pvfcmp_vsvvl #define _vel_pvfcmp_vvvMvl __builtin_ve_vl_pvfcmp_vvvMvl #define _vel_pvfcmp_vsvMvl __builtin_ve_vl_pvfcmp_vsvMvl #define _vel_vfmaxd_vvvl __builtin_ve_vl_vfmaxd_vvvl #define _vel_vfmaxd_vvvvl __builtin_ve_vl_vfmaxd_vvvvl #define _vel_vfmaxd_vsvl __builtin_ve_vl_vfmaxd_vsvl #define _vel_vfmaxd_vsvvl __builtin_ve_vl_vfmaxd_vsvvl #define _vel_vfmaxd_vvvmvl __builtin_ve_vl_vfmaxd_vvvmvl #define _vel_vfmaxd_vsvmvl __builtin_ve_vl_vfmaxd_vsvmvl #define _vel_vfmaxs_vvvl __builtin_ve_vl_vfmaxs_vvvl #define _vel_vfmaxs_vvvvl __builtin_ve_vl_vfmaxs_vvvvl #define _vel_vfmaxs_vsvl __builtin_ve_vl_vfmaxs_vsvl #define _vel_vfmaxs_vsvvl __builtin_ve_vl_vfmaxs_vsvvl #define _vel_vfmaxs_vvvmvl __builtin_ve_vl_vfmaxs_vvvmvl #define _vel_vfmaxs_vsvmvl __builtin_ve_vl_vfmaxs_vsvmvl #define _vel_pvfmax_vvvl __builtin_ve_vl_pvfmax_vvvl #define _vel_pvfmax_vvvvl __builtin_ve_vl_pvfmax_vvvvl #define _vel_pvfmax_vsvl __builtin_ve_vl_pvfmax_vsvl #define _vel_pvfmax_vsvvl __builtin_ve_vl_pvfmax_vsvvl #define _vel_pvfmax_vvvMvl __builtin_ve_vl_pvfmax_vvvMvl #define _vel_pvfmax_vsvMvl __builtin_ve_vl_pvfmax_vsvMvl #define _vel_vfmind_vvvl __builtin_ve_vl_vfmind_vvvl #define _vel_vfmind_vvvvl __builtin_ve_vl_vfmind_vvvvl #define _vel_vfmind_vsvl __builtin_ve_vl_vfmind_vsvl #define _vel_vfmind_vsvvl __builtin_ve_vl_vfmind_vsvvl #define _vel_vfmind_vvvmvl __builtin_ve_vl_vfmind_vvvmvl #define _vel_vfmind_vsvmvl __builtin_ve_vl_vfmind_vsvmvl #define _vel_vfmins_vvvl __builtin_ve_vl_vfmins_vvvl #define _vel_vfmins_vvvvl __builtin_ve_vl_vfmins_vvvvl #define _vel_vfmins_vsvl __builtin_ve_vl_vfmins_vsvl #define _vel_vfmins_vsvvl __builtin_ve_vl_vfmins_vsvvl #define _vel_vfmins_vvvmvl __builtin_ve_vl_vfmins_vvvmvl #define _vel_vfmins_vsvmvl __builtin_ve_vl_vfmins_vsvmvl #define _vel_pvfmin_vvvl __builtin_ve_vl_pvfmin_vvvl #define _vel_pvfmin_vvvvl __builtin_ve_vl_pvfmin_vvvvl #define _vel_pvfmin_vsvl __builtin_ve_vl_pvfmin_vsvl #define _vel_pvfmin_vsvvl __builtin_ve_vl_pvfmin_vsvvl #define _vel_pvfmin_vvvMvl __builtin_ve_vl_pvfmin_vvvMvl #define _vel_pvfmin_vsvMvl __builtin_ve_vl_pvfmin_vsvMvl #define _vel_vfmadd_vvvvl __builtin_ve_vl_vfmadd_vvvvl #define _vel_vfmadd_vvvvvl __builtin_ve_vl_vfmadd_vvvvvl #define _vel_vfmadd_vsvvl __builtin_ve_vl_vfmadd_vsvvl #define _vel_vfmadd_vsvvvl __builtin_ve_vl_vfmadd_vsvvvl #define _vel_vfmadd_vvsvl __builtin_ve_vl_vfmadd_vvsvl #define _vel_vfmadd_vvsvvl __builtin_ve_vl_vfmadd_vvsvvl #define _vel_vfmadd_vvvvmvl __builtin_ve_vl_vfmadd_vvvvmvl #define _vel_vfmadd_vsvvmvl __builtin_ve_vl_vfmadd_vsvvmvl #define _vel_vfmadd_vvsvmvl __builtin_ve_vl_vfmadd_vvsvmvl #define _vel_vfmads_vvvvl __builtin_ve_vl_vfmads_vvvvl #define _vel_vfmads_vvvvvl __builtin_ve_vl_vfmads_vvvvvl #define _vel_vfmads_vsvvl __builtin_ve_vl_vfmads_vsvvl #define _vel_vfmads_vsvvvl __builtin_ve_vl_vfmads_vsvvvl #define _vel_vfmads_vvsvl __builtin_ve_vl_vfmads_vvsvl #define _vel_vfmads_vvsvvl __builtin_ve_vl_vfmads_vvsvvl #define _vel_vfmads_vvvvmvl __builtin_ve_vl_vfmads_vvvvmvl #define _vel_vfmads_vsvvmvl __builtin_ve_vl_vfmads_vsvvmvl #define _vel_vfmads_vvsvmvl __builtin_ve_vl_vfmads_vvsvmvl #define _vel_pvfmad_vvvvl __builtin_ve_vl_pvfmad_vvvvl #define _vel_pvfmad_vvvvvl __builtin_ve_vl_pvfmad_vvvvvl #define _vel_pvfmad_vsvvl __builtin_ve_vl_pvfmad_vsvvl #define _vel_pvfmad_vsvvvl __builtin_ve_vl_pvfmad_vsvvvl #define _vel_pvfmad_vvsvl __builtin_ve_vl_pvfmad_vvsvl #define _vel_pvfmad_vvsvvl __builtin_ve_vl_pvfmad_vvsvvl #define _vel_pvfmad_vvvvMvl __builtin_ve_vl_pvfmad_vvvvMvl #define _vel_pvfmad_vsvvMvl __builtin_ve_vl_pvfmad_vsvvMvl #define _vel_pvfmad_vvsvMvl __builtin_ve_vl_pvfmad_vvsvMvl #define _vel_vfmsbd_vvvvl __builtin_ve_vl_vfmsbd_vvvvl #define _vel_vfmsbd_vvvvvl __builtin_ve_vl_vfmsbd_vvvvvl #define _vel_vfmsbd_vsvvl __builtin_ve_vl_vfmsbd_vsvvl #define _vel_vfmsbd_vsvvvl __builtin_ve_vl_vfmsbd_vsvvvl #define _vel_vfmsbd_vvsvl __builtin_ve_vl_vfmsbd_vvsvl #define _vel_vfmsbd_vvsvvl __builtin_ve_vl_vfmsbd_vvsvvl #define _vel_vfmsbd_vvvvmvl __builtin_ve_vl_vfmsbd_vvvvmvl #define _vel_vfmsbd_vsvvmvl __builtin_ve_vl_vfmsbd_vsvvmvl #define _vel_vfmsbd_vvsvmvl __builtin_ve_vl_vfmsbd_vvsvmvl #define _vel_vfmsbs_vvvvl __builtin_ve_vl_vfmsbs_vvvvl #define _vel_vfmsbs_vvvvvl __builtin_ve_vl_vfmsbs_vvvvvl #define _vel_vfmsbs_vsvvl __builtin_ve_vl_vfmsbs_vsvvl #define _vel_vfmsbs_vsvvvl __builtin_ve_vl_vfmsbs_vsvvvl #define _vel_vfmsbs_vvsvl __builtin_ve_vl_vfmsbs_vvsvl #define _vel_vfmsbs_vvsvvl __builtin_ve_vl_vfmsbs_vvsvvl #define _vel_vfmsbs_vvvvmvl __builtin_ve_vl_vfmsbs_vvvvmvl #define _vel_vfmsbs_vsvvmvl __builtin_ve_vl_vfmsbs_vsvvmvl #define _vel_vfmsbs_vvsvmvl __builtin_ve_vl_vfmsbs_vvsvmvl #define _vel_pvfmsb_vvvvl __builtin_ve_vl_pvfmsb_vvvvl #define _vel_pvfmsb_vvvvvl __builtin_ve_vl_pvfmsb_vvvvvl #define _vel_pvfmsb_vsvvl __builtin_ve_vl_pvfmsb_vsvvl #define _vel_pvfmsb_vsvvvl __builtin_ve_vl_pvfmsb_vsvvvl #define _vel_pvfmsb_vvsvl __builtin_ve_vl_pvfmsb_vvsvl #define _vel_pvfmsb_vvsvvl __builtin_ve_vl_pvfmsb_vvsvvl #define _vel_pvfmsb_vvvvMvl __builtin_ve_vl_pvfmsb_vvvvMvl #define _vel_pvfmsb_vsvvMvl __builtin_ve_vl_pvfmsb_vsvvMvl #define _vel_pvfmsb_vvsvMvl __builtin_ve_vl_pvfmsb_vvsvMvl #define _vel_vfnmadd_vvvvl __builtin_ve_vl_vfnmadd_vvvvl #define _vel_vfnmadd_vvvvvl __builtin_ve_vl_vfnmadd_vvvvvl #define _vel_vfnmadd_vsvvl __builtin_ve_vl_vfnmadd_vsvvl #define _vel_vfnmadd_vsvvvl __builtin_ve_vl_vfnmadd_vsvvvl #define _vel_vfnmadd_vvsvl __builtin_ve_vl_vfnmadd_vvsvl #define _vel_vfnmadd_vvsvvl __builtin_ve_vl_vfnmadd_vvsvvl #define _vel_vfnmadd_vvvvmvl __builtin_ve_vl_vfnmadd_vvvvmvl #define _vel_vfnmadd_vsvvmvl __builtin_ve_vl_vfnmadd_vsvvmvl #define _vel_vfnmadd_vvsvmvl __builtin_ve_vl_vfnmadd_vvsvmvl #define _vel_vfnmads_vvvvl __builtin_ve_vl_vfnmads_vvvvl #define _vel_vfnmads_vvvvvl __builtin_ve_vl_vfnmads_vvvvvl #define _vel_vfnmads_vsvvl __builtin_ve_vl_vfnmads_vsvvl #define _vel_vfnmads_vsvvvl __builtin_ve_vl_vfnmads_vsvvvl #define _vel_vfnmads_vvsvl __builtin_ve_vl_vfnmads_vvsvl #define _vel_vfnmads_vvsvvl __builtin_ve_vl_vfnmads_vvsvvl #define _vel_vfnmads_vvvvmvl __builtin_ve_vl_vfnmads_vvvvmvl #define _vel_vfnmads_vsvvmvl __builtin_ve_vl_vfnmads_vsvvmvl #define _vel_vfnmads_vvsvmvl __builtin_ve_vl_vfnmads_vvsvmvl #define _vel_pvfnmad_vvvvl __builtin_ve_vl_pvfnmad_vvvvl #define _vel_pvfnmad_vvvvvl __builtin_ve_vl_pvfnmad_vvvvvl #define _vel_pvfnmad_vsvvl __builtin_ve_vl_pvfnmad_vsvvl #define _vel_pvfnmad_vsvvvl __builtin_ve_vl_pvfnmad_vsvvvl #define _vel_pvfnmad_vvsvl __builtin_ve_vl_pvfnmad_vvsvl #define _vel_pvfnmad_vvsvvl __builtin_ve_vl_pvfnmad_vvsvvl #define _vel_pvfnmad_vvvvMvl __builtin_ve_vl_pvfnmad_vvvvMvl #define _vel_pvfnmad_vsvvMvl __builtin_ve_vl_pvfnmad_vsvvMvl #define _vel_pvfnmad_vvsvMvl __builtin_ve_vl_pvfnmad_vvsvMvl #define _vel_vfnmsbd_vvvvl __builtin_ve_vl_vfnmsbd_vvvvl #define _vel_vfnmsbd_vvvvvl __builtin_ve_vl_vfnmsbd_vvvvvl #define _vel_vfnmsbd_vsvvl __builtin_ve_vl_vfnmsbd_vsvvl #define _vel_vfnmsbd_vsvvvl __builtin_ve_vl_vfnmsbd_vsvvvl #define _vel_vfnmsbd_vvsvl __builtin_ve_vl_vfnmsbd_vvsvl #define _vel_vfnmsbd_vvsvvl __builtin_ve_vl_vfnmsbd_vvsvvl #define _vel_vfnmsbd_vvvvmvl __builtin_ve_vl_vfnmsbd_vvvvmvl #define _vel_vfnmsbd_vsvvmvl __builtin_ve_vl_vfnmsbd_vsvvmvl #define _vel_vfnmsbd_vvsvmvl __builtin_ve_vl_vfnmsbd_vvsvmvl #define _vel_vfnmsbs_vvvvl __builtin_ve_vl_vfnmsbs_vvvvl #define _vel_vfnmsbs_vvvvvl __builtin_ve_vl_vfnmsbs_vvvvvl #define _vel_vfnmsbs_vsvvl __builtin_ve_vl_vfnmsbs_vsvvl #define _vel_vfnmsbs_vsvvvl __builtin_ve_vl_vfnmsbs_vsvvvl #define _vel_vfnmsbs_vvsvl __builtin_ve_vl_vfnmsbs_vvsvl #define _vel_vfnmsbs_vvsvvl __builtin_ve_vl_vfnmsbs_vvsvvl #define _vel_vfnmsbs_vvvvmvl __builtin_ve_vl_vfnmsbs_vvvvmvl #define _vel_vfnmsbs_vsvvmvl __builtin_ve_vl_vfnmsbs_vsvvmvl #define _vel_vfnmsbs_vvsvmvl __builtin_ve_vl_vfnmsbs_vvsvmvl #define _vel_pvfnmsb_vvvvl __builtin_ve_vl_pvfnmsb_vvvvl #define _vel_pvfnmsb_vvvvvl __builtin_ve_vl_pvfnmsb_vvvvvl #define _vel_pvfnmsb_vsvvl __builtin_ve_vl_pvfnmsb_vsvvl #define _vel_pvfnmsb_vsvvvl __builtin_ve_vl_pvfnmsb_vsvvvl #define _vel_pvfnmsb_vvsvl __builtin_ve_vl_pvfnmsb_vvsvl #define _vel_pvfnmsb_vvsvvl __builtin_ve_vl_pvfnmsb_vvsvvl #define _vel_pvfnmsb_vvvvMvl __builtin_ve_vl_pvfnmsb_vvvvMvl #define _vel_pvfnmsb_vsvvMvl __builtin_ve_vl_pvfnmsb_vsvvMvl #define _vel_pvfnmsb_vvsvMvl __builtin_ve_vl_pvfnmsb_vvsvMvl #define _vel_vrcpd_vvl __builtin_ve_vl_vrcpd_vvl #define _vel_vrcpd_vvvl __builtin_ve_vl_vrcpd_vvvl #define _vel_vrcps_vvl __builtin_ve_vl_vrcps_vvl #define _vel_vrcps_vvvl __builtin_ve_vl_vrcps_vvvl #define _vel_pvrcp_vvl __builtin_ve_vl_pvrcp_vvl #define _vel_pvrcp_vvvl __builtin_ve_vl_pvrcp_vvvl #define _vel_vrsqrtd_vvl __builtin_ve_vl_vrsqrtd_vvl #define _vel_vrsqrtd_vvvl __builtin_ve_vl_vrsqrtd_vvvl #define _vel_vrsqrts_vvl __builtin_ve_vl_vrsqrts_vvl #define _vel_vrsqrts_vvvl __builtin_ve_vl_vrsqrts_vvvl #define _vel_pvrsqrt_vvl __builtin_ve_vl_pvrsqrt_vvl #define _vel_pvrsqrt_vvvl __builtin_ve_vl_pvrsqrt_vvvl #define _vel_vrsqrtdnex_vvl __builtin_ve_vl_vrsqrtdnex_vvl #define _vel_vrsqrtdnex_vvvl __builtin_ve_vl_vrsqrtdnex_vvvl #define _vel_vrsqrtsnex_vvl __builtin_ve_vl_vrsqrtsnex_vvl #define _vel_vrsqrtsnex_vvvl __builtin_ve_vl_vrsqrtsnex_vvvl #define _vel_pvrsqrtnex_vvl __builtin_ve_vl_pvrsqrtnex_vvl #define _vel_pvrsqrtnex_vvvl __builtin_ve_vl_pvrsqrtnex_vvvl #define _vel_vcvtwdsx_vvl __builtin_ve_vl_vcvtwdsx_vvl #define _vel_vcvtwdsx_vvvl __builtin_ve_vl_vcvtwdsx_vvvl #define _vel_vcvtwdsx_vvmvl __builtin_ve_vl_vcvtwdsx_vvmvl #define _vel_vcvtwdsxrz_vvl __builtin_ve_vl_vcvtwdsxrz_vvl #define _vel_vcvtwdsxrz_vvvl __builtin_ve_vl_vcvtwdsxrz_vvvl #define _vel_vcvtwdsxrz_vvmvl __builtin_ve_vl_vcvtwdsxrz_vvmvl #define _vel_vcvtwdzx_vvl __builtin_ve_vl_vcvtwdzx_vvl #define _vel_vcvtwdzx_vvvl __builtin_ve_vl_vcvtwdzx_vvvl #define _vel_vcvtwdzx_vvmvl __builtin_ve_vl_vcvtwdzx_vvmvl #define _vel_vcvtwdzxrz_vvl __builtin_ve_vl_vcvtwdzxrz_vvl #define _vel_vcvtwdzxrz_vvvl __builtin_ve_vl_vcvtwdzxrz_vvvl #define _vel_vcvtwdzxrz_vvmvl __builtin_ve_vl_vcvtwdzxrz_vvmvl #define _vel_vcvtwssx_vvl __builtin_ve_vl_vcvtwssx_vvl #define _vel_vcvtwssx_vvvl __builtin_ve_vl_vcvtwssx_vvvl #define _vel_vcvtwssx_vvmvl __builtin_ve_vl_vcvtwssx_vvmvl #define _vel_vcvtwssxrz_vvl __builtin_ve_vl_vcvtwssxrz_vvl #define _vel_vcvtwssxrz_vvvl __builtin_ve_vl_vcvtwssxrz_vvvl #define _vel_vcvtwssxrz_vvmvl __builtin_ve_vl_vcvtwssxrz_vvmvl #define _vel_vcvtwszx_vvl __builtin_ve_vl_vcvtwszx_vvl #define _vel_vcvtwszx_vvvl __builtin_ve_vl_vcvtwszx_vvvl #define _vel_vcvtwszx_vvmvl __builtin_ve_vl_vcvtwszx_vvmvl #define _vel_vcvtwszxrz_vvl __builtin_ve_vl_vcvtwszxrz_vvl #define _vel_vcvtwszxrz_vvvl __builtin_ve_vl_vcvtwszxrz_vvvl #define _vel_vcvtwszxrz_vvmvl __builtin_ve_vl_vcvtwszxrz_vvmvl #define _vel_pvcvtws_vvl __builtin_ve_vl_pvcvtws_vvl #define _vel_pvcvtws_vvvl __builtin_ve_vl_pvcvtws_vvvl #define _vel_pvcvtws_vvMvl __builtin_ve_vl_pvcvtws_vvMvl #define _vel_pvcvtwsrz_vvl __builtin_ve_vl_pvcvtwsrz_vvl #define _vel_pvcvtwsrz_vvvl __builtin_ve_vl_pvcvtwsrz_vvvl #define _vel_pvcvtwsrz_vvMvl __builtin_ve_vl_pvcvtwsrz_vvMvl #define _vel_vcvtld_vvl __builtin_ve_vl_vcvtld_vvl #define _vel_vcvtld_vvvl __builtin_ve_vl_vcvtld_vvvl #define _vel_vcvtld_vvmvl __builtin_ve_vl_vcvtld_vvmvl #define _vel_vcvtldrz_vvl __builtin_ve_vl_vcvtldrz_vvl #define _vel_vcvtldrz_vvvl __builtin_ve_vl_vcvtldrz_vvvl #define _vel_vcvtldrz_vvmvl __builtin_ve_vl_vcvtldrz_vvmvl #define _vel_vcvtdw_vvl __builtin_ve_vl_vcvtdw_vvl #define _vel_vcvtdw_vvvl __builtin_ve_vl_vcvtdw_vvvl #define _vel_vcvtsw_vvl __builtin_ve_vl_vcvtsw_vvl #define _vel_vcvtsw_vvvl __builtin_ve_vl_vcvtsw_vvvl #define _vel_pvcvtsw_vvl __builtin_ve_vl_pvcvtsw_vvl #define _vel_pvcvtsw_vvvl __builtin_ve_vl_pvcvtsw_vvvl #define _vel_vcvtdl_vvl __builtin_ve_vl_vcvtdl_vvl #define _vel_vcvtdl_vvvl __builtin_ve_vl_vcvtdl_vvvl #define _vel_vcvtds_vvl __builtin_ve_vl_vcvtds_vvl #define _vel_vcvtds_vvvl __builtin_ve_vl_vcvtds_vvvl #define _vel_vcvtsd_vvl __builtin_ve_vl_vcvtsd_vvl #define _vel_vcvtsd_vvvl __builtin_ve_vl_vcvtsd_vvvl #define _vel_vmrg_vvvml __builtin_ve_vl_vmrg_vvvml #define _vel_vmrg_vvvmvl __builtin_ve_vl_vmrg_vvvmvl #define _vel_vmrg_vsvml __builtin_ve_vl_vmrg_vsvml #define _vel_vmrg_vsvmvl __builtin_ve_vl_vmrg_vsvmvl #define _vel_vmrgw_vvvMl __builtin_ve_vl_vmrgw_vvvMl #define _vel_vmrgw_vvvMvl __builtin_ve_vl_vmrgw_vvvMvl #define _vel_vmrgw_vsvMl __builtin_ve_vl_vmrgw_vsvMl #define _vel_vmrgw_vsvMvl __builtin_ve_vl_vmrgw_vsvMvl #define _vel_vshf_vvvsl __builtin_ve_vl_vshf_vvvsl #define _vel_vshf_vvvsvl __builtin_ve_vl_vshf_vvvsvl #define _vel_vcp_vvmvl __builtin_ve_vl_vcp_vvmvl #define _vel_vex_vvmvl __builtin_ve_vl_vex_vvmvl #define _vel_vfmklat_ml __builtin_ve_vl_vfmklat_ml #define _vel_vfmklaf_ml __builtin_ve_vl_vfmklaf_ml #define _vel_pvfmkat_Ml __builtin_ve_vl_pvfmkat_Ml #define _vel_pvfmkaf_Ml __builtin_ve_vl_pvfmkaf_Ml #define _vel_vfmklgt_mvl __builtin_ve_vl_vfmklgt_mvl #define _vel_vfmklgt_mvml __builtin_ve_vl_vfmklgt_mvml #define _vel_vfmkllt_mvl __builtin_ve_vl_vfmkllt_mvl #define _vel_vfmkllt_mvml __builtin_ve_vl_vfmkllt_mvml #define _vel_vfmklne_mvl __builtin_ve_vl_vfmklne_mvl #define _vel_vfmklne_mvml __builtin_ve_vl_vfmklne_mvml #define _vel_vfmkleq_mvl __builtin_ve_vl_vfmkleq_mvl #define _vel_vfmkleq_mvml __builtin_ve_vl_vfmkleq_mvml #define _vel_vfmklge_mvl __builtin_ve_vl_vfmklge_mvl #define _vel_vfmklge_mvml __builtin_ve_vl_vfmklge_mvml #define _vel_vfmklle_mvl __builtin_ve_vl_vfmklle_mvl #define _vel_vfmklle_mvml __builtin_ve_vl_vfmklle_mvml #define _vel_vfmklnum_mvl __builtin_ve_vl_vfmklnum_mvl #define _vel_vfmklnum_mvml __builtin_ve_vl_vfmklnum_mvml #define _vel_vfmklnan_mvl __builtin_ve_vl_vfmklnan_mvl #define _vel_vfmklnan_mvml __builtin_ve_vl_vfmklnan_mvml #define _vel_vfmklgtnan_mvl __builtin_ve_vl_vfmklgtnan_mvl #define _vel_vfmklgtnan_mvml __builtin_ve_vl_vfmklgtnan_mvml #define _vel_vfmklltnan_mvl __builtin_ve_vl_vfmklltnan_mvl #define _vel_vfmklltnan_mvml __builtin_ve_vl_vfmklltnan_mvml #define _vel_vfmklnenan_mvl __builtin_ve_vl_vfmklnenan_mvl #define _vel_vfmklnenan_mvml __builtin_ve_vl_vfmklnenan_mvml #define _vel_vfmkleqnan_mvl __builtin_ve_vl_vfmkleqnan_mvl #define _vel_vfmkleqnan_mvml __builtin_ve_vl_vfmkleqnan_mvml #define _vel_vfmklgenan_mvl __builtin_ve_vl_vfmklgenan_mvl #define _vel_vfmklgenan_mvml __builtin_ve_vl_vfmklgenan_mvml #define _vel_vfmkllenan_mvl __builtin_ve_vl_vfmkllenan_mvl #define _vel_vfmkllenan_mvml __builtin_ve_vl_vfmkllenan_mvml #define _vel_vfmkwgt_mvl __builtin_ve_vl_vfmkwgt_mvl #define _vel_vfmkwgt_mvml __builtin_ve_vl_vfmkwgt_mvml #define _vel_vfmkwlt_mvl __builtin_ve_vl_vfmkwlt_mvl #define _vel_vfmkwlt_mvml __builtin_ve_vl_vfmkwlt_mvml #define _vel_vfmkwne_mvl __builtin_ve_vl_vfmkwne_mvl #define _vel_vfmkwne_mvml __builtin_ve_vl_vfmkwne_mvml #define _vel_vfmkweq_mvl __builtin_ve_vl_vfmkweq_mvl #define _vel_vfmkweq_mvml __builtin_ve_vl_vfmkweq_mvml #define _vel_vfmkwge_mvl __builtin_ve_vl_vfmkwge_mvl #define _vel_vfmkwge_mvml __builtin_ve_vl_vfmkwge_mvml #define _vel_vfmkwle_mvl __builtin_ve_vl_vfmkwle_mvl #define _vel_vfmkwle_mvml __builtin_ve_vl_vfmkwle_mvml #define _vel_vfmkwnum_mvl __builtin_ve_vl_vfmkwnum_mvl #define _vel_vfmkwnum_mvml __builtin_ve_vl_vfmkwnum_mvml #define _vel_vfmkwnan_mvl __builtin_ve_vl_vfmkwnan_mvl #define _vel_vfmkwnan_mvml __builtin_ve_vl_vfmkwnan_mvml #define _vel_vfmkwgtnan_mvl __builtin_ve_vl_vfmkwgtnan_mvl #define _vel_vfmkwgtnan_mvml __builtin_ve_vl_vfmkwgtnan_mvml #define _vel_vfmkwltnan_mvl __builtin_ve_vl_vfmkwltnan_mvl #define _vel_vfmkwltnan_mvml __builtin_ve_vl_vfmkwltnan_mvml #define _vel_vfmkwnenan_mvl __builtin_ve_vl_vfmkwnenan_mvl #define _vel_vfmkwnenan_mvml __builtin_ve_vl_vfmkwnenan_mvml #define _vel_vfmkweqnan_mvl __builtin_ve_vl_vfmkweqnan_mvl #define _vel_vfmkweqnan_mvml __builtin_ve_vl_vfmkweqnan_mvml #define _vel_vfmkwgenan_mvl __builtin_ve_vl_vfmkwgenan_mvl #define _vel_vfmkwgenan_mvml __builtin_ve_vl_vfmkwgenan_mvml #define _vel_vfmkwlenan_mvl __builtin_ve_vl_vfmkwlenan_mvl #define _vel_vfmkwlenan_mvml __builtin_ve_vl_vfmkwlenan_mvml #define _vel_pvfmkwlogt_mvl __builtin_ve_vl_pvfmkwlogt_mvl #define _vel_pvfmkwupgt_mvl __builtin_ve_vl_pvfmkwupgt_mvl #define _vel_pvfmkwlogt_mvml __builtin_ve_vl_pvfmkwlogt_mvml #define _vel_pvfmkwupgt_mvml __builtin_ve_vl_pvfmkwupgt_mvml #define _vel_pvfmkwlolt_mvl __builtin_ve_vl_pvfmkwlolt_mvl #define _vel_pvfmkwuplt_mvl __builtin_ve_vl_pvfmkwuplt_mvl #define _vel_pvfmkwlolt_mvml __builtin_ve_vl_pvfmkwlolt_mvml #define _vel_pvfmkwuplt_mvml __builtin_ve_vl_pvfmkwuplt_mvml #define _vel_pvfmkwlone_mvl __builtin_ve_vl_pvfmkwlone_mvl #define _vel_pvfmkwupne_mvl __builtin_ve_vl_pvfmkwupne_mvl #define _vel_pvfmkwlone_mvml __builtin_ve_vl_pvfmkwlone_mvml #define _vel_pvfmkwupne_mvml __builtin_ve_vl_pvfmkwupne_mvml #define _vel_pvfmkwloeq_mvl __builtin_ve_vl_pvfmkwloeq_mvl #define _vel_pvfmkwupeq_mvl __builtin_ve_vl_pvfmkwupeq_mvl #define _vel_pvfmkwloeq_mvml __builtin_ve_vl_pvfmkwloeq_mvml #define _vel_pvfmkwupeq_mvml __builtin_ve_vl_pvfmkwupeq_mvml #define _vel_pvfmkwloge_mvl __builtin_ve_vl_pvfmkwloge_mvl #define _vel_pvfmkwupge_mvl __builtin_ve_vl_pvfmkwupge_mvl #define _vel_pvfmkwloge_mvml __builtin_ve_vl_pvfmkwloge_mvml #define _vel_pvfmkwupge_mvml __builtin_ve_vl_pvfmkwupge_mvml #define _vel_pvfmkwlole_mvl __builtin_ve_vl_pvfmkwlole_mvl #define _vel_pvfmkwuple_mvl __builtin_ve_vl_pvfmkwuple_mvl #define _vel_pvfmkwlole_mvml __builtin_ve_vl_pvfmkwlole_mvml #define _vel_pvfmkwuple_mvml __builtin_ve_vl_pvfmkwuple_mvml #define _vel_pvfmkwlonum_mvl __builtin_ve_vl_pvfmkwlonum_mvl #define _vel_pvfmkwupnum_mvl __builtin_ve_vl_pvfmkwupnum_mvl #define _vel_pvfmkwlonum_mvml __builtin_ve_vl_pvfmkwlonum_mvml #define _vel_pvfmkwupnum_mvml __builtin_ve_vl_pvfmkwupnum_mvml #define _vel_pvfmkwlonan_mvl __builtin_ve_vl_pvfmkwlonan_mvl #define _vel_pvfmkwupnan_mvl __builtin_ve_vl_pvfmkwupnan_mvl #define _vel_pvfmkwlonan_mvml __builtin_ve_vl_pvfmkwlonan_mvml #define _vel_pvfmkwupnan_mvml __builtin_ve_vl_pvfmkwupnan_mvml #define _vel_pvfmkwlogtnan_mvl __builtin_ve_vl_pvfmkwlogtnan_mvl #define _vel_pvfmkwupgtnan_mvl __builtin_ve_vl_pvfmkwupgtnan_mvl #define _vel_pvfmkwlogtnan_mvml __builtin_ve_vl_pvfmkwlogtnan_mvml #define _vel_pvfmkwupgtnan_mvml __builtin_ve_vl_pvfmkwupgtnan_mvml #define _vel_pvfmkwloltnan_mvl __builtin_ve_vl_pvfmkwloltnan_mvl #define _vel_pvfmkwupltnan_mvl __builtin_ve_vl_pvfmkwupltnan_mvl #define _vel_pvfmkwloltnan_mvml __builtin_ve_vl_pvfmkwloltnan_mvml #define _vel_pvfmkwupltnan_mvml __builtin_ve_vl_pvfmkwupltnan_mvml #define _vel_pvfmkwlonenan_mvl __builtin_ve_vl_pvfmkwlonenan_mvl #define _vel_pvfmkwupnenan_mvl __builtin_ve_vl_pvfmkwupnenan_mvl #define _vel_pvfmkwlonenan_mvml __builtin_ve_vl_pvfmkwlonenan_mvml #define _vel_pvfmkwupnenan_mvml __builtin_ve_vl_pvfmkwupnenan_mvml #define _vel_pvfmkwloeqnan_mvl __builtin_ve_vl_pvfmkwloeqnan_mvl #define _vel_pvfmkwupeqnan_mvl __builtin_ve_vl_pvfmkwupeqnan_mvl #define _vel_pvfmkwloeqnan_mvml __builtin_ve_vl_pvfmkwloeqnan_mvml #define _vel_pvfmkwupeqnan_mvml __builtin_ve_vl_pvfmkwupeqnan_mvml #define _vel_pvfmkwlogenan_mvl __builtin_ve_vl_pvfmkwlogenan_mvl #define _vel_pvfmkwupgenan_mvl __builtin_ve_vl_pvfmkwupgenan_mvl #define _vel_pvfmkwlogenan_mvml __builtin_ve_vl_pvfmkwlogenan_mvml #define _vel_pvfmkwupgenan_mvml __builtin_ve_vl_pvfmkwupgenan_mvml #define _vel_pvfmkwlolenan_mvl __builtin_ve_vl_pvfmkwlolenan_mvl #define _vel_pvfmkwuplenan_mvl __builtin_ve_vl_pvfmkwuplenan_mvl #define _vel_pvfmkwlolenan_mvml __builtin_ve_vl_pvfmkwlolenan_mvml #define _vel_pvfmkwuplenan_mvml __builtin_ve_vl_pvfmkwuplenan_mvml #define _vel_pvfmkwgt_Mvl __builtin_ve_vl_pvfmkwgt_Mvl #define _vel_pvfmkwgt_MvMl __builtin_ve_vl_pvfmkwgt_MvMl #define _vel_pvfmkwlt_Mvl __builtin_ve_vl_pvfmkwlt_Mvl #define _vel_pvfmkwlt_MvMl __builtin_ve_vl_pvfmkwlt_MvMl #define _vel_pvfmkwne_Mvl __builtin_ve_vl_pvfmkwne_Mvl #define _vel_pvfmkwne_MvMl __builtin_ve_vl_pvfmkwne_MvMl #define _vel_pvfmkweq_Mvl __builtin_ve_vl_pvfmkweq_Mvl #define _vel_pvfmkweq_MvMl __builtin_ve_vl_pvfmkweq_MvMl #define _vel_pvfmkwge_Mvl __builtin_ve_vl_pvfmkwge_Mvl #define _vel_pvfmkwge_MvMl __builtin_ve_vl_pvfmkwge_MvMl #define _vel_pvfmkwle_Mvl __builtin_ve_vl_pvfmkwle_Mvl #define _vel_pvfmkwle_MvMl __builtin_ve_vl_pvfmkwle_MvMl #define _vel_pvfmkwnum_Mvl __builtin_ve_vl_pvfmkwnum_Mvl #define _vel_pvfmkwnum_MvMl __builtin_ve_vl_pvfmkwnum_MvMl #define _vel_pvfmkwnan_Mvl __builtin_ve_vl_pvfmkwnan_Mvl #define _vel_pvfmkwnan_MvMl __builtin_ve_vl_pvfmkwnan_MvMl #define _vel_pvfmkwgtnan_Mvl __builtin_ve_vl_pvfmkwgtnan_Mvl #define _vel_pvfmkwgtnan_MvMl __builtin_ve_vl_pvfmkwgtnan_MvMl #define _vel_pvfmkwltnan_Mvl __builtin_ve_vl_pvfmkwltnan_Mvl #define _vel_pvfmkwltnan_MvMl __builtin_ve_vl_pvfmkwltnan_MvMl #define _vel_pvfmkwnenan_Mvl __builtin_ve_vl_pvfmkwnenan_Mvl #define _vel_pvfmkwnenan_MvMl __builtin_ve_vl_pvfmkwnenan_MvMl #define _vel_pvfmkweqnan_Mvl __builtin_ve_vl_pvfmkweqnan_Mvl #define _vel_pvfmkweqnan_MvMl __builtin_ve_vl_pvfmkweqnan_MvMl #define _vel_pvfmkwgenan_Mvl __builtin_ve_vl_pvfmkwgenan_Mvl #define _vel_pvfmkwgenan_MvMl __builtin_ve_vl_pvfmkwgenan_MvMl #define _vel_pvfmkwlenan_Mvl __builtin_ve_vl_pvfmkwlenan_Mvl #define _vel_pvfmkwlenan_MvMl __builtin_ve_vl_pvfmkwlenan_MvMl #define _vel_vfmkdgt_mvl __builtin_ve_vl_vfmkdgt_mvl #define _vel_vfmkdgt_mvml __builtin_ve_vl_vfmkdgt_mvml #define _vel_vfmkdlt_mvl __builtin_ve_vl_vfmkdlt_mvl #define _vel_vfmkdlt_mvml __builtin_ve_vl_vfmkdlt_mvml #define _vel_vfmkdne_mvl __builtin_ve_vl_vfmkdne_mvl #define _vel_vfmkdne_mvml __builtin_ve_vl_vfmkdne_mvml #define _vel_vfmkdeq_mvl __builtin_ve_vl_vfmkdeq_mvl #define _vel_vfmkdeq_mvml __builtin_ve_vl_vfmkdeq_mvml #define _vel_vfmkdge_mvl __builtin_ve_vl_vfmkdge_mvl #define _vel_vfmkdge_mvml __builtin_ve_vl_vfmkdge_mvml #define _vel_vfmkdle_mvl __builtin_ve_vl_vfmkdle_mvl #define _vel_vfmkdle_mvml __builtin_ve_vl_vfmkdle_mvml #define _vel_vfmkdnum_mvl __builtin_ve_vl_vfmkdnum_mvl #define _vel_vfmkdnum_mvml __builtin_ve_vl_vfmkdnum_mvml #define _vel_vfmkdnan_mvl __builtin_ve_vl_vfmkdnan_mvl #define _vel_vfmkdnan_mvml __builtin_ve_vl_vfmkdnan_mvml #define _vel_vfmkdgtnan_mvl __builtin_ve_vl_vfmkdgtnan_mvl #define _vel_vfmkdgtnan_mvml __builtin_ve_vl_vfmkdgtnan_mvml #define _vel_vfmkdltnan_mvl __builtin_ve_vl_vfmkdltnan_mvl #define _vel_vfmkdltnan_mvml __builtin_ve_vl_vfmkdltnan_mvml #define _vel_vfmkdnenan_mvl __builtin_ve_vl_vfmkdnenan_mvl #define _vel_vfmkdnenan_mvml __builtin_ve_vl_vfmkdnenan_mvml #define _vel_vfmkdeqnan_mvl __builtin_ve_vl_vfmkdeqnan_mvl #define _vel_vfmkdeqnan_mvml __builtin_ve_vl_vfmkdeqnan_mvml #define _vel_vfmkdgenan_mvl __builtin_ve_vl_vfmkdgenan_mvl #define _vel_vfmkdgenan_mvml __builtin_ve_vl_vfmkdgenan_mvml #define _vel_vfmkdlenan_mvl __builtin_ve_vl_vfmkdlenan_mvl #define _vel_vfmkdlenan_mvml __builtin_ve_vl_vfmkdlenan_mvml #define _vel_vfmksgt_mvl __builtin_ve_vl_vfmksgt_mvl #define _vel_vfmksgt_mvml __builtin_ve_vl_vfmksgt_mvml #define _vel_vfmkslt_mvl __builtin_ve_vl_vfmkslt_mvl #define _vel_vfmkslt_mvml __builtin_ve_vl_vfmkslt_mvml #define _vel_vfmksne_mvl __builtin_ve_vl_vfmksne_mvl #define _vel_vfmksne_mvml __builtin_ve_vl_vfmksne_mvml #define _vel_vfmkseq_mvl __builtin_ve_vl_vfmkseq_mvl #define _vel_vfmkseq_mvml __builtin_ve_vl_vfmkseq_mvml #define _vel_vfmksge_mvl __builtin_ve_vl_vfmksge_mvl #define _vel_vfmksge_mvml __builtin_ve_vl_vfmksge_mvml #define _vel_vfmksle_mvl __builtin_ve_vl_vfmksle_mvl #define _vel_vfmksle_mvml __builtin_ve_vl_vfmksle_mvml #define _vel_vfmksnum_mvl __builtin_ve_vl_vfmksnum_mvl #define _vel_vfmksnum_mvml __builtin_ve_vl_vfmksnum_mvml #define _vel_vfmksnan_mvl __builtin_ve_vl_vfmksnan_mvl #define _vel_vfmksnan_mvml __builtin_ve_vl_vfmksnan_mvml #define _vel_vfmksgtnan_mvl __builtin_ve_vl_vfmksgtnan_mvl #define _vel_vfmksgtnan_mvml __builtin_ve_vl_vfmksgtnan_mvml #define _vel_vfmksltnan_mvl __builtin_ve_vl_vfmksltnan_mvl #define _vel_vfmksltnan_mvml __builtin_ve_vl_vfmksltnan_mvml #define _vel_vfmksnenan_mvl __builtin_ve_vl_vfmksnenan_mvl #define _vel_vfmksnenan_mvml __builtin_ve_vl_vfmksnenan_mvml #define _vel_vfmkseqnan_mvl __builtin_ve_vl_vfmkseqnan_mvl #define _vel_vfmkseqnan_mvml __builtin_ve_vl_vfmkseqnan_mvml #define _vel_vfmksgenan_mvl __builtin_ve_vl_vfmksgenan_mvl #define _vel_vfmksgenan_mvml __builtin_ve_vl_vfmksgenan_mvml #define _vel_vfmkslenan_mvl __builtin_ve_vl_vfmkslenan_mvl #define _vel_vfmkslenan_mvml __builtin_ve_vl_vfmkslenan_mvml #define _vel_pvfmkslogt_mvl __builtin_ve_vl_pvfmkslogt_mvl #define _vel_pvfmksupgt_mvl __builtin_ve_vl_pvfmksupgt_mvl #define _vel_pvfmkslogt_mvml __builtin_ve_vl_pvfmkslogt_mvml #define _vel_pvfmksupgt_mvml __builtin_ve_vl_pvfmksupgt_mvml #define _vel_pvfmkslolt_mvl __builtin_ve_vl_pvfmkslolt_mvl #define _vel_pvfmksuplt_mvl __builtin_ve_vl_pvfmksuplt_mvl #define _vel_pvfmkslolt_mvml __builtin_ve_vl_pvfmkslolt_mvml #define _vel_pvfmksuplt_mvml __builtin_ve_vl_pvfmksuplt_mvml #define _vel_pvfmkslone_mvl __builtin_ve_vl_pvfmkslone_mvl #define _vel_pvfmksupne_mvl __builtin_ve_vl_pvfmksupne_mvl #define _vel_pvfmkslone_mvml __builtin_ve_vl_pvfmkslone_mvml #define _vel_pvfmksupne_mvml __builtin_ve_vl_pvfmksupne_mvml #define _vel_pvfmksloeq_mvl __builtin_ve_vl_pvfmksloeq_mvl #define _vel_pvfmksupeq_mvl __builtin_ve_vl_pvfmksupeq_mvl #define _vel_pvfmksloeq_mvml __builtin_ve_vl_pvfmksloeq_mvml #define _vel_pvfmksupeq_mvml __builtin_ve_vl_pvfmksupeq_mvml #define _vel_pvfmksloge_mvl __builtin_ve_vl_pvfmksloge_mvl #define _vel_pvfmksupge_mvl __builtin_ve_vl_pvfmksupge_mvl #define _vel_pvfmksloge_mvml __builtin_ve_vl_pvfmksloge_mvml #define _vel_pvfmksupge_mvml __builtin_ve_vl_pvfmksupge_mvml #define _vel_pvfmkslole_mvl __builtin_ve_vl_pvfmkslole_mvl #define _vel_pvfmksuple_mvl __builtin_ve_vl_pvfmksuple_mvl #define _vel_pvfmkslole_mvml __builtin_ve_vl_pvfmkslole_mvml #define _vel_pvfmksuple_mvml __builtin_ve_vl_pvfmksuple_mvml #define _vel_pvfmkslonum_mvl __builtin_ve_vl_pvfmkslonum_mvl #define _vel_pvfmksupnum_mvl __builtin_ve_vl_pvfmksupnum_mvl #define _vel_pvfmkslonum_mvml __builtin_ve_vl_pvfmkslonum_mvml #define _vel_pvfmksupnum_mvml __builtin_ve_vl_pvfmksupnum_mvml #define _vel_pvfmkslonan_mvl __builtin_ve_vl_pvfmkslonan_mvl #define _vel_pvfmksupnan_mvl __builtin_ve_vl_pvfmksupnan_mvl #define _vel_pvfmkslonan_mvml __builtin_ve_vl_pvfmkslonan_mvml #define _vel_pvfmksupnan_mvml __builtin_ve_vl_pvfmksupnan_mvml #define _vel_pvfmkslogtnan_mvl __builtin_ve_vl_pvfmkslogtnan_mvl #define _vel_pvfmksupgtnan_mvl __builtin_ve_vl_pvfmksupgtnan_mvl #define _vel_pvfmkslogtnan_mvml __builtin_ve_vl_pvfmkslogtnan_mvml #define _vel_pvfmksupgtnan_mvml __builtin_ve_vl_pvfmksupgtnan_mvml #define _vel_pvfmksloltnan_mvl __builtin_ve_vl_pvfmksloltnan_mvl #define _vel_pvfmksupltnan_mvl __builtin_ve_vl_pvfmksupltnan_mvl #define _vel_pvfmksloltnan_mvml __builtin_ve_vl_pvfmksloltnan_mvml #define _vel_pvfmksupltnan_mvml __builtin_ve_vl_pvfmksupltnan_mvml #define _vel_pvfmkslonenan_mvl __builtin_ve_vl_pvfmkslonenan_mvl #define _vel_pvfmksupnenan_mvl __builtin_ve_vl_pvfmksupnenan_mvl #define _vel_pvfmkslonenan_mvml __builtin_ve_vl_pvfmkslonenan_mvml #define _vel_pvfmksupnenan_mvml __builtin_ve_vl_pvfmksupnenan_mvml #define _vel_pvfmksloeqnan_mvl __builtin_ve_vl_pvfmksloeqnan_mvl #define _vel_pvfmksupeqnan_mvl __builtin_ve_vl_pvfmksupeqnan_mvl #define _vel_pvfmksloeqnan_mvml __builtin_ve_vl_pvfmksloeqnan_mvml #define _vel_pvfmksupeqnan_mvml __builtin_ve_vl_pvfmksupeqnan_mvml #define _vel_pvfmkslogenan_mvl __builtin_ve_vl_pvfmkslogenan_mvl #define _vel_pvfmksupgenan_mvl __builtin_ve_vl_pvfmksupgenan_mvl #define _vel_pvfmkslogenan_mvml __builtin_ve_vl_pvfmkslogenan_mvml #define _vel_pvfmksupgenan_mvml __builtin_ve_vl_pvfmksupgenan_mvml #define _vel_pvfmkslolenan_mvl __builtin_ve_vl_pvfmkslolenan_mvl #define _vel_pvfmksuplenan_mvl __builtin_ve_vl_pvfmksuplenan_mvl #define _vel_pvfmkslolenan_mvml __builtin_ve_vl_pvfmkslolenan_mvml #define _vel_pvfmksuplenan_mvml __builtin_ve_vl_pvfmksuplenan_mvml #define _vel_pvfmksgt_Mvl __builtin_ve_vl_pvfmksgt_Mvl #define _vel_pvfmksgt_MvMl __builtin_ve_vl_pvfmksgt_MvMl #define _vel_pvfmkslt_Mvl __builtin_ve_vl_pvfmkslt_Mvl #define _vel_pvfmkslt_MvMl __builtin_ve_vl_pvfmkslt_MvMl #define _vel_pvfmksne_Mvl __builtin_ve_vl_pvfmksne_Mvl #define _vel_pvfmksne_MvMl __builtin_ve_vl_pvfmksne_MvMl #define _vel_pvfmkseq_Mvl __builtin_ve_vl_pvfmkseq_Mvl #define _vel_pvfmkseq_MvMl __builtin_ve_vl_pvfmkseq_MvMl #define _vel_pvfmksge_Mvl __builtin_ve_vl_pvfmksge_Mvl #define _vel_pvfmksge_MvMl __builtin_ve_vl_pvfmksge_MvMl #define _vel_pvfmksle_Mvl __builtin_ve_vl_pvfmksle_Mvl #define _vel_pvfmksle_MvMl __builtin_ve_vl_pvfmksle_MvMl #define _vel_pvfmksnum_Mvl __builtin_ve_vl_pvfmksnum_Mvl #define _vel_pvfmksnum_MvMl __builtin_ve_vl_pvfmksnum_MvMl #define _vel_pvfmksnan_Mvl __builtin_ve_vl_pvfmksnan_Mvl #define _vel_pvfmksnan_MvMl __builtin_ve_vl_pvfmksnan_MvMl #define _vel_pvfmksgtnan_Mvl __builtin_ve_vl_pvfmksgtnan_Mvl #define _vel_pvfmksgtnan_MvMl __builtin_ve_vl_pvfmksgtnan_MvMl #define _vel_pvfmksltnan_Mvl __builtin_ve_vl_pvfmksltnan_Mvl #define _vel_pvfmksltnan_MvMl __builtin_ve_vl_pvfmksltnan_MvMl #define _vel_pvfmksnenan_Mvl __builtin_ve_vl_pvfmksnenan_Mvl #define _vel_pvfmksnenan_MvMl __builtin_ve_vl_pvfmksnenan_MvMl #define _vel_pvfmkseqnan_Mvl __builtin_ve_vl_pvfmkseqnan_Mvl #define _vel_pvfmkseqnan_MvMl __builtin_ve_vl_pvfmkseqnan_MvMl #define _vel_pvfmksgenan_Mvl __builtin_ve_vl_pvfmksgenan_Mvl #define _vel_pvfmksgenan_MvMl __builtin_ve_vl_pvfmksgenan_MvMl #define _vel_pvfmkslenan_Mvl __builtin_ve_vl_pvfmkslenan_Mvl #define _vel_pvfmkslenan_MvMl __builtin_ve_vl_pvfmkslenan_MvMl #define _vel_vsumwsx_vvl __builtin_ve_vl_vsumwsx_vvl #define _vel_vsumwsx_vvml __builtin_ve_vl_vsumwsx_vvml #define _vel_vsumwzx_vvl __builtin_ve_vl_vsumwzx_vvl #define _vel_vsumwzx_vvml __builtin_ve_vl_vsumwzx_vvml #define _vel_vsuml_vvl __builtin_ve_vl_vsuml_vvl #define _vel_vsuml_vvml __builtin_ve_vl_vsuml_vvml #define _vel_vfsumd_vvl __builtin_ve_vl_vfsumd_vvl #define _vel_vfsumd_vvml __builtin_ve_vl_vfsumd_vvml #define _vel_vfsums_vvl __builtin_ve_vl_vfsums_vvl #define _vel_vfsums_vvml __builtin_ve_vl_vfsums_vvml #define _vel_vrmaxswfstsx_vvl __builtin_ve_vl_vrmaxswfstsx_vvl #define _vel_vrmaxswfstsx_vvvl __builtin_ve_vl_vrmaxswfstsx_vvvl #define _vel_vrmaxswlstsx_vvl __builtin_ve_vl_vrmaxswlstsx_vvl #define _vel_vrmaxswlstsx_vvvl __builtin_ve_vl_vrmaxswlstsx_vvvl #define _vel_vrmaxswfstzx_vvl __builtin_ve_vl_vrmaxswfstzx_vvl #define _vel_vrmaxswfstzx_vvvl __builtin_ve_vl_vrmaxswfstzx_vvvl #define _vel_vrmaxswlstzx_vvl __builtin_ve_vl_vrmaxswlstzx_vvl #define _vel_vrmaxswlstzx_vvvl __builtin_ve_vl_vrmaxswlstzx_vvvl #define _vel_vrminswfstsx_vvl __builtin_ve_vl_vrminswfstsx_vvl #define _vel_vrminswfstsx_vvvl __builtin_ve_vl_vrminswfstsx_vvvl #define _vel_vrminswlstsx_vvl __builtin_ve_vl_vrminswlstsx_vvl #define _vel_vrminswlstsx_vvvl __builtin_ve_vl_vrminswlstsx_vvvl #define _vel_vrminswfstzx_vvl __builtin_ve_vl_vrminswfstzx_vvl #define _vel_vrminswfstzx_vvvl __builtin_ve_vl_vrminswfstzx_vvvl #define _vel_vrminswlstzx_vvl __builtin_ve_vl_vrminswlstzx_vvl #define _vel_vrminswlstzx_vvvl __builtin_ve_vl_vrminswlstzx_vvvl #define _vel_vrmaxslfst_vvl __builtin_ve_vl_vrmaxslfst_vvl #define _vel_vrmaxslfst_vvvl __builtin_ve_vl_vrmaxslfst_vvvl #define _vel_vrmaxsllst_vvl __builtin_ve_vl_vrmaxsllst_vvl #define _vel_vrmaxsllst_vvvl __builtin_ve_vl_vrmaxsllst_vvvl #define _vel_vrminslfst_vvl __builtin_ve_vl_vrminslfst_vvl #define _vel_vrminslfst_vvvl __builtin_ve_vl_vrminslfst_vvvl #define _vel_vrminsllst_vvl __builtin_ve_vl_vrminsllst_vvl #define _vel_vrminsllst_vvvl __builtin_ve_vl_vrminsllst_vvvl #define _vel_vfrmaxdfst_vvl __builtin_ve_vl_vfrmaxdfst_vvl #define _vel_vfrmaxdfst_vvvl __builtin_ve_vl_vfrmaxdfst_vvvl #define _vel_vfrmaxdlst_vvl __builtin_ve_vl_vfrmaxdlst_vvl #define _vel_vfrmaxdlst_vvvl __builtin_ve_vl_vfrmaxdlst_vvvl #define _vel_vfrmaxsfst_vvl __builtin_ve_vl_vfrmaxsfst_vvl #define _vel_vfrmaxsfst_vvvl __builtin_ve_vl_vfrmaxsfst_vvvl #define _vel_vfrmaxslst_vvl __builtin_ve_vl_vfrmaxslst_vvl #define _vel_vfrmaxslst_vvvl __builtin_ve_vl_vfrmaxslst_vvvl #define _vel_vfrmindfst_vvl __builtin_ve_vl_vfrmindfst_vvl #define _vel_vfrmindfst_vvvl __builtin_ve_vl_vfrmindfst_vvvl #define _vel_vfrmindlst_vvl __builtin_ve_vl_vfrmindlst_vvl #define _vel_vfrmindlst_vvvl __builtin_ve_vl_vfrmindlst_vvvl #define _vel_vfrminsfst_vvl __builtin_ve_vl_vfrminsfst_vvl #define _vel_vfrminsfst_vvvl __builtin_ve_vl_vfrminsfst_vvvl #define _vel_vfrminslst_vvl __builtin_ve_vl_vfrminslst_vvl #define _vel_vfrminslst_vvvl __builtin_ve_vl_vfrminslst_vvvl #define _vel_vrand_vvl __builtin_ve_vl_vrand_vvl #define _vel_vrand_vvml __builtin_ve_vl_vrand_vvml #define _vel_vror_vvl __builtin_ve_vl_vror_vvl #define _vel_vror_vvml __builtin_ve_vl_vror_vvml #define _vel_vrxor_vvl __builtin_ve_vl_vrxor_vvl #define _vel_vrxor_vvml __builtin_ve_vl_vrxor_vvml #define _vel_vgt_vvssl __builtin_ve_vl_vgt_vvssl #define _vel_vgt_vvssvl __builtin_ve_vl_vgt_vvssvl #define _vel_vgt_vvssml __builtin_ve_vl_vgt_vvssml #define _vel_vgt_vvssmvl __builtin_ve_vl_vgt_vvssmvl #define _vel_vgtnc_vvssl __builtin_ve_vl_vgtnc_vvssl #define _vel_vgtnc_vvssvl __builtin_ve_vl_vgtnc_vvssvl #define _vel_vgtnc_vvssml __builtin_ve_vl_vgtnc_vvssml #define _vel_vgtnc_vvssmvl __builtin_ve_vl_vgtnc_vvssmvl #define _vel_vgtu_vvssl __builtin_ve_vl_vgtu_vvssl #define _vel_vgtu_vvssvl __builtin_ve_vl_vgtu_vvssvl #define _vel_vgtu_vvssml __builtin_ve_vl_vgtu_vvssml #define _vel_vgtu_vvssmvl __builtin_ve_vl_vgtu_vvssmvl #define _vel_vgtunc_vvssl __builtin_ve_vl_vgtunc_vvssl #define _vel_vgtunc_vvssvl __builtin_ve_vl_vgtunc_vvssvl #define _vel_vgtunc_vvssml __builtin_ve_vl_vgtunc_vvssml #define _vel_vgtunc_vvssmvl __builtin_ve_vl_vgtunc_vvssmvl #define _vel_vgtlsx_vvssl __builtin_ve_vl_vgtlsx_vvssl #define _vel_vgtlsx_vvssvl __builtin_ve_vl_vgtlsx_vvssvl #define _vel_vgtlsx_vvssml __builtin_ve_vl_vgtlsx_vvssml #define _vel_vgtlsx_vvssmvl __builtin_ve_vl_vgtlsx_vvssmvl #define _vel_vgtlsxnc_vvssl __builtin_ve_vl_vgtlsxnc_vvssl #define _vel_vgtlsxnc_vvssvl __builtin_ve_vl_vgtlsxnc_vvssvl #define _vel_vgtlsxnc_vvssml __builtin_ve_vl_vgtlsxnc_vvssml #define _vel_vgtlsxnc_vvssmvl __builtin_ve_vl_vgtlsxnc_vvssmvl #define _vel_vgtlzx_vvssl __builtin_ve_vl_vgtlzx_vvssl #define _vel_vgtlzx_vvssvl __builtin_ve_vl_vgtlzx_vvssvl #define _vel_vgtlzx_vvssml __builtin_ve_vl_vgtlzx_vvssml #define _vel_vgtlzx_vvssmvl __builtin_ve_vl_vgtlzx_vvssmvl #define _vel_vgtlzxnc_vvssl __builtin_ve_vl_vgtlzxnc_vvssl #define _vel_vgtlzxnc_vvssvl __builtin_ve_vl_vgtlzxnc_vvssvl #define _vel_vgtlzxnc_vvssml __builtin_ve_vl_vgtlzxnc_vvssml #define _vel_vgtlzxnc_vvssmvl __builtin_ve_vl_vgtlzxnc_vvssmvl #define _vel_vsc_vvssl __builtin_ve_vl_vsc_vvssl #define _vel_vsc_vvssml __builtin_ve_vl_vsc_vvssml #define _vel_vscnc_vvssl __builtin_ve_vl_vscnc_vvssl #define _vel_vscnc_vvssml __builtin_ve_vl_vscnc_vvssml #define _vel_vscot_vvssl __builtin_ve_vl_vscot_vvssl #define _vel_vscot_vvssml __builtin_ve_vl_vscot_vvssml #define _vel_vscncot_vvssl __builtin_ve_vl_vscncot_vvssl #define _vel_vscncot_vvssml __builtin_ve_vl_vscncot_vvssml #define _vel_vscu_vvssl __builtin_ve_vl_vscu_vvssl #define _vel_vscu_vvssml __builtin_ve_vl_vscu_vvssml #define _vel_vscunc_vvssl __builtin_ve_vl_vscunc_vvssl #define _vel_vscunc_vvssml __builtin_ve_vl_vscunc_vvssml #define _vel_vscuot_vvssl __builtin_ve_vl_vscuot_vvssl #define _vel_vscuot_vvssml __builtin_ve_vl_vscuot_vvssml #define _vel_vscuncot_vvssl __builtin_ve_vl_vscuncot_vvssl #define _vel_vscuncot_vvssml __builtin_ve_vl_vscuncot_vvssml #define _vel_vscl_vvssl __builtin_ve_vl_vscl_vvssl #define _vel_vscl_vvssml __builtin_ve_vl_vscl_vvssml #define _vel_vsclnc_vvssl __builtin_ve_vl_vsclnc_vvssl #define _vel_vsclnc_vvssml __builtin_ve_vl_vsclnc_vvssml #define _vel_vsclot_vvssl __builtin_ve_vl_vsclot_vvssl #define _vel_vsclot_vvssml __builtin_ve_vl_vsclot_vvssml #define _vel_vsclncot_vvssl __builtin_ve_vl_vsclncot_vvssl #define _vel_vsclncot_vvssml __builtin_ve_vl_vsclncot_vvssml #define _vel_andm_mmm __builtin_ve_vl_andm_mmm #define _vel_andm_MMM __builtin_ve_vl_andm_MMM #define _vel_orm_mmm __builtin_ve_vl_orm_mmm #define _vel_orm_MMM __builtin_ve_vl_orm_MMM #define _vel_xorm_mmm __builtin_ve_vl_xorm_mmm #define _vel_xorm_MMM __builtin_ve_vl_xorm_MMM #define _vel_eqvm_mmm __builtin_ve_vl_eqvm_mmm #define _vel_eqvm_MMM __builtin_ve_vl_eqvm_MMM #define _vel_nndm_mmm __builtin_ve_vl_nndm_mmm #define _vel_nndm_MMM __builtin_ve_vl_nndm_MMM #define _vel_negm_mm __builtin_ve_vl_negm_mm #define _vel_negm_MM __builtin_ve_vl_negm_MM #define _vel_pcvm_sml __builtin_ve_vl_pcvm_sml #define _vel_lzvm_sml __builtin_ve_vl_lzvm_sml #define _vel_tovm_sml __builtin_ve_vl_tovm_sml #define _vel_lcr_sss __builtin_ve_vl_lcr_sss #define _vel_scr_sss __builtin_ve_vl_scr_sss #define _vel_tscr_ssss __builtin_ve_vl_tscr_ssss #define _vel_fidcr_sss __builtin_ve_vl_fidcr_sss #define _vel_fencei __builtin_ve_vl_fencei #define _vel_fencem_s __builtin_ve_vl_fencem_s #define _vel_fencec_s __builtin_ve_vl_fencec_s #define _vel_svob __builtin_ve_vl_svob /*===------------ vpclmulqdqintrin.h - VPCLMULQDQ intrinsics ---------------=== * * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __VPCLMULQDQINTRIN_H #define __VPCLMULQDQINTRIN_H #define _mm256_clmulepi64_epi128(A, B, I) \ ((__m256i)__builtin_ia32_pclmulqdq256((__v4di)(__m256i)(A), \ (__v4di)(__m256i)(B), \ (char)(I))) #ifdef __AVX512FINTRIN_H #define _mm512_clmulepi64_epi128(A, B, I) \ ((__m512i)__builtin_ia32_pclmulqdq512((__v8di)(__m512i)(A), \ (__v8di)(__m512i)(B), \ (char)(I))) #endif // __AVX512FINTRIN_H #endif /* __VPCLMULQDQINTRIN_H */ /*===----------------------- waitpkgintrin.h - WAITPKG --------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #if !defined __X86INTRIN_H && !defined __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __WAITPKGINTRIN_H #define __WAITPKGINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("waitpkg"))) static __inline__ void __DEFAULT_FN_ATTRS _umonitor (void * __address) { __builtin_ia32_umonitor (__address); } static __inline__ unsigned char __DEFAULT_FN_ATTRS _umwait (unsigned int __control, unsigned long long __counter) { return __builtin_ia32_umwait (__control, (unsigned int)(__counter >> 32), (unsigned int)__counter); } static __inline__ unsigned char __DEFAULT_FN_ATTRS _tpause (unsigned int __control, unsigned long long __counter) { return __builtin_ia32_tpause (__control, (unsigned int)(__counter >> 32), (unsigned int)__counter); } #undef __DEFAULT_FN_ATTRS #endif /* __WAITPKGINTRIN_H */ /*===---- wasm_simd128.h - WebAssembly portable SIMD intrinsics ------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __WASM_SIMD128_H #define __WASM_SIMD128_H #include #include // User-facing type typedef int32_t v128_t __attribute__((__vector_size__(16), __aligned__(16))); // Internal types determined by clang builtin definitions typedef int32_t __v128_u __attribute__((__vector_size__(16), __aligned__(1))); typedef signed char __i8x16 __attribute__((__vector_size__(16), __aligned__(16))); typedef unsigned char __u8x16 __attribute__((__vector_size__(16), __aligned__(16))); typedef short __i16x8 __attribute__((__vector_size__(16), __aligned__(16))); typedef unsigned short __u16x8 __attribute__((__vector_size__(16), __aligned__(16))); typedef int __i32x4 __attribute__((__vector_size__(16), __aligned__(16))); typedef unsigned int __u32x4 __attribute__((__vector_size__(16), __aligned__(16))); typedef long long __i64x2 __attribute__((__vector_size__(16), __aligned__(16))); typedef unsigned long long __u64x2 __attribute__((__vector_size__(16), __aligned__(16))); typedef float __f32x4 __attribute__((__vector_size__(16), __aligned__(16))); typedef double __f64x2 __attribute__((__vector_size__(16), __aligned__(16))); typedef signed char __i8x8 __attribute__((__vector_size__(8), __aligned__(8))); typedef unsigned char __u8x8 __attribute__((__vector_size__(8), __aligned__(8))); typedef short __i16x4 __attribute__((__vector_size__(8), __aligned__(8))); typedef unsigned short __u16x4 __attribute__((__vector_size__(8), __aligned__(8))); typedef int __i32x2 __attribute__((__vector_size__(8), __aligned__(8))); typedef unsigned int __u32x2 __attribute__((__vector_size__(8), __aligned__(8))); typedef float __f32x2 __attribute__((__vector_size__(8), __aligned__(8))); #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("simd128"), \ __min_vector_width__(128))) #define __REQUIRE_CONSTANT(c) \ __attribute__((__diagnose_if__(!__builtin_constant_p(c), \ #c " must be constant", "error"))) static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_load(const void *__mem) { // UB-free unaligned access copied from xmmintrin.h struct __wasm_v128_load_struct { __v128_u __v; } __attribute__((__packed__, __may_alias__)); return ((const struct __wasm_v128_load_struct *)__mem)->__v; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_load8_splat(const void *__mem) { struct __wasm_v128_load8_splat_struct { uint8_t __v; } __attribute__((__packed__, __may_alias__)); uint8_t __v = ((const struct __wasm_v128_load8_splat_struct *)__mem)->__v; return (v128_t)(__u8x16){__v, __v, __v, __v, __v, __v, __v, __v, __v, __v, __v, __v, __v, __v, __v, __v}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_load16_splat(const void *__mem) { struct __wasm_v128_load16_splat_struct { uint16_t __v; } __attribute__((__packed__, __may_alias__)); uint16_t __v = ((const struct __wasm_v128_load16_splat_struct *)__mem)->__v; return (v128_t)(__u16x8){__v, __v, __v, __v, __v, __v, __v, __v}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_load32_splat(const void *__mem) { struct __wasm_v128_load32_splat_struct { uint32_t __v; } __attribute__((__packed__, __may_alias__)); uint32_t __v = ((const struct __wasm_v128_load32_splat_struct *)__mem)->__v; return (v128_t)(__u32x4){__v, __v, __v, __v}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_load64_splat(const void *__mem) { struct __wasm_v128_load64_splat_struct { uint64_t __v; } __attribute__((__packed__, __may_alias__)); uint64_t __v = ((const struct __wasm_v128_load64_splat_struct *)__mem)->__v; return (v128_t)(__u64x2){__v, __v}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_load8x8(const void *__mem) { struct __wasm_i16x8_load8x8_struct { __i8x8 __v; } __attribute__((__packed__, __may_alias__)); __i8x8 __v = ((const struct __wasm_i16x8_load8x8_struct *)__mem)->__v; return (v128_t) __builtin_convertvector(__v, __i16x8); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_load8x8(const void *__mem) { struct __wasm_u16x8_load8x8_struct { __u8x8 __v; } __attribute__((__packed__, __may_alias__)); __u8x8 __v = ((const struct __wasm_u16x8_load8x8_struct *)__mem)->__v; return (v128_t) __builtin_convertvector(__v, __u16x8); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_load16x4(const void *__mem) { struct __wasm_i32x4_load16x4_struct { __i16x4 __v; } __attribute__((__packed__, __may_alias__)); __i16x4 __v = ((const struct __wasm_i32x4_load16x4_struct *)__mem)->__v; return (v128_t) __builtin_convertvector(__v, __i32x4); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_load16x4(const void *__mem) { struct __wasm_u32x4_load16x4_struct { __u16x4 __v; } __attribute__((__packed__, __may_alias__)); __u16x4 __v = ((const struct __wasm_u32x4_load16x4_struct *)__mem)->__v; return (v128_t) __builtin_convertvector(__v, __u32x4); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_load32x2(const void *__mem) { struct __wasm_i64x2_load32x2_struct { __i32x2 __v; } __attribute__((__packed__, __may_alias__)); __i32x2 __v = ((const struct __wasm_i64x2_load32x2_struct *)__mem)->__v; return (v128_t) __builtin_convertvector(__v, __i64x2); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_load32x2(const void *__mem) { struct __wasm_u64x2_load32x2_struct { __u32x2 __v; } __attribute__((__packed__, __may_alias__)); __u32x2 __v = ((const struct __wasm_u64x2_load32x2_struct *)__mem)->__v; return (v128_t) __builtin_convertvector(__v, __u64x2); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_load32_zero(const void *__mem) { struct __wasm_v128_load32_zero_struct { int32_t __v; } __attribute__((__packed__, __may_alias__)); int32_t __v = ((const struct __wasm_v128_load32_zero_struct *)__mem)->__v; return (v128_t)(__i32x4){__v, 0, 0, 0}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_load64_zero(const void *__mem) { struct __wasm_v128_load64_zero_struct { int64_t __v; } __attribute__((__packed__, __may_alias__)); int64_t __v = ((const struct __wasm_v128_load64_zero_struct *)__mem)->__v; return (v128_t)(__i64x2){__v, 0}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_load8_lane( const void *__mem, v128_t __vec, int __i) __REQUIRE_CONSTANT(__i) { struct __wasm_v128_load8_lane_struct { int8_t __v; } __attribute__((__packed__, __may_alias__)); int8_t __v = ((const struct __wasm_v128_load8_lane_struct *)__mem)->__v; __i8x16 __ret = (__i8x16)__vec; __ret[__i] = __v; return (v128_t)__ret; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_load16_lane( const void *__mem, v128_t __vec, int __i) __REQUIRE_CONSTANT(__i) { struct __wasm_v128_load16_lane_struct { int16_t __v; } __attribute__((__packed__, __may_alias__)); int16_t __v = ((const struct __wasm_v128_load16_lane_struct *)__mem)->__v; __i16x8 __ret = (__i16x8)__vec; __ret[__i] = __v; return (v128_t)__ret; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_load32_lane( const void *__mem, v128_t __vec, int __i) __REQUIRE_CONSTANT(__i) { struct __wasm_v128_load32_lane_struct { int32_t __v; } __attribute__((__packed__, __may_alias__)); int32_t __v = ((const struct __wasm_v128_load32_lane_struct *)__mem)->__v; __i32x4 __ret = (__i32x4)__vec; __ret[__i] = __v; return (v128_t)__ret; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_load64_lane( const void *__mem, v128_t __vec, int __i) __REQUIRE_CONSTANT(__i) { struct __wasm_v128_load64_lane_struct { int64_t __v; } __attribute__((__packed__, __may_alias__)); int64_t __v = ((const struct __wasm_v128_load64_lane_struct *)__mem)->__v; __i64x2 __ret = (__i64x2)__vec; __ret[__i] = __v; return (v128_t)__ret; } static __inline__ void __DEFAULT_FN_ATTRS wasm_v128_store(void *__mem, v128_t __a) { // UB-free unaligned access copied from xmmintrin.h struct __wasm_v128_store_struct { __v128_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __wasm_v128_store_struct *)__mem)->__v = __a; } static __inline__ void __DEFAULT_FN_ATTRS wasm_v128_store8_lane(void *__mem, v128_t __vec, int __i) __REQUIRE_CONSTANT(__i) { struct __wasm_v128_store8_lane_struct { int8_t __v; } __attribute__((__packed__, __may_alias__)); ((struct __wasm_v128_store8_lane_struct *)__mem)->__v = ((__i8x16)__vec)[__i]; } static __inline__ void __DEFAULT_FN_ATTRS wasm_v128_store16_lane(void *__mem, v128_t __vec, int __i) __REQUIRE_CONSTANT(__i) { struct __wasm_v128_store16_lane_struct { int16_t __v; } __attribute__((__packed__, __may_alias__)); ((struct __wasm_v128_store16_lane_struct *)__mem)->__v = ((__i16x8)__vec)[__i]; } static __inline__ void __DEFAULT_FN_ATTRS wasm_v128_store32_lane(void *__mem, v128_t __vec, int __i) __REQUIRE_CONSTANT(__i) { struct __wasm_v128_store32_lane_struct { int32_t __v; } __attribute__((__packed__, __may_alias__)); ((struct __wasm_v128_store32_lane_struct *)__mem)->__v = ((__i32x4)__vec)[__i]; } static __inline__ void __DEFAULT_FN_ATTRS wasm_v128_store64_lane(void *__mem, v128_t __vec, int __i) __REQUIRE_CONSTANT(__i) { struct __wasm_v128_store64_lane_struct { int64_t __v; } __attribute__((__packed__, __may_alias__)); ((struct __wasm_v128_store64_lane_struct *)__mem)->__v = ((__i64x2)__vec)[__i]; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_make(int8_t __c0, int8_t __c1, int8_t __c2, int8_t __c3, int8_t __c4, int8_t __c5, int8_t __c6, int8_t __c7, int8_t __c8, int8_t __c9, int8_t __c10, int8_t __c11, int8_t __c12, int8_t __c13, int8_t __c14, int8_t __c15) { return (v128_t)(__i8x16){__c0, __c1, __c2, __c3, __c4, __c5, __c6, __c7, __c8, __c9, __c10, __c11, __c12, __c13, __c14, __c15}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_make(uint8_t __c0, uint8_t __c1, uint8_t __c2, uint8_t __c3, uint8_t __c4, uint8_t __c5, uint8_t __c6, uint8_t __c7, uint8_t __c8, uint8_t __c9, uint8_t __c10, uint8_t __c11, uint8_t __c12, uint8_t __c13, uint8_t __c14, uint8_t __c15) { return (v128_t)(__u8x16){__c0, __c1, __c2, __c3, __c4, __c5, __c6, __c7, __c8, __c9, __c10, __c11, __c12, __c13, __c14, __c15}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_make(int16_t __c0, int16_t __c1, int16_t __c2, int16_t __c3, int16_t __c4, int16_t __c5, int16_t __c6, int16_t __c7) { return (v128_t)(__i16x8){__c0, __c1, __c2, __c3, __c4, __c5, __c6, __c7}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_make(uint16_t __c0, uint16_t __c1, uint16_t __c2, uint16_t __c3, uint16_t __c4, uint16_t __c5, uint16_t __c6, uint16_t __c7) { return (v128_t)(__u16x8){__c0, __c1, __c2, __c3, __c4, __c5, __c6, __c7}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_make(int32_t __c0, int32_t __c1, int32_t __c2, int32_t __c3) { return (v128_t)(__i32x4){__c0, __c1, __c2, __c3}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_make(uint32_t __c0, uint32_t __c1, uint32_t __c2, uint32_t __c3) { return (v128_t)(__u32x4){__c0, __c1, __c2, __c3}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_make(int64_t __c0, int64_t __c1) { return (v128_t)(__i64x2){__c0, __c1}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_make(uint64_t __c0, uint64_t __c1) { return (v128_t)(__u64x2){__c0, __c1}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_make(float __c0, float __c1, float __c2, float __c3) { return (v128_t)(__f32x4){__c0, __c1, __c2, __c3}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_make(double __c0, double __c1) { return (v128_t)(__f64x2){__c0, __c1}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_const(int8_t __c0, int8_t __c1, int8_t __c2, int8_t __c3, int8_t __c4, int8_t __c5, int8_t __c6, int8_t __c7, int8_t __c8, int8_t __c9, int8_t __c10, int8_t __c11, int8_t __c12, int8_t __c13, int8_t __c14, int8_t __c15) __REQUIRE_CONSTANT(__c0) __REQUIRE_CONSTANT(__c1) __REQUIRE_CONSTANT(__c2) __REQUIRE_CONSTANT(__c3) __REQUIRE_CONSTANT(__c4) __REQUIRE_CONSTANT(__c5) __REQUIRE_CONSTANT(__c6) __REQUIRE_CONSTANT(__c7) __REQUIRE_CONSTANT(__c8) __REQUIRE_CONSTANT(__c9) __REQUIRE_CONSTANT(__c10) __REQUIRE_CONSTANT(__c11) __REQUIRE_CONSTANT(__c12) __REQUIRE_CONSTANT(__c13) __REQUIRE_CONSTANT(__c14) __REQUIRE_CONSTANT(__c15) { return (v128_t)(__i8x16){__c0, __c1, __c2, __c3, __c4, __c5, __c6, __c7, __c8, __c9, __c10, __c11, __c12, __c13, __c14, __c15}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_const(uint8_t __c0, uint8_t __c1, uint8_t __c2, uint8_t __c3, uint8_t __c4, uint8_t __c5, uint8_t __c6, uint8_t __c7, uint8_t __c8, uint8_t __c9, uint8_t __c10, uint8_t __c11, uint8_t __c12, uint8_t __c13, uint8_t __c14, uint8_t __c15) __REQUIRE_CONSTANT(__c0) __REQUIRE_CONSTANT(__c1) __REQUIRE_CONSTANT(__c2) __REQUIRE_CONSTANT(__c3) __REQUIRE_CONSTANT(__c4) __REQUIRE_CONSTANT(__c5) __REQUIRE_CONSTANT(__c6) __REQUIRE_CONSTANT(__c7) __REQUIRE_CONSTANT(__c8) __REQUIRE_CONSTANT(__c9) __REQUIRE_CONSTANT(__c10) __REQUIRE_CONSTANT(__c11) __REQUIRE_CONSTANT(__c12) __REQUIRE_CONSTANT(__c13) __REQUIRE_CONSTANT(__c14) __REQUIRE_CONSTANT(__c15) { return (v128_t)(__u8x16){__c0, __c1, __c2, __c3, __c4, __c5, __c6, __c7, __c8, __c9, __c10, __c11, __c12, __c13, __c14, __c15}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_const(int16_t __c0, int16_t __c1, int16_t __c2, int16_t __c3, int16_t __c4, int16_t __c5, int16_t __c6, int16_t __c7) __REQUIRE_CONSTANT(__c0) __REQUIRE_CONSTANT(__c1) __REQUIRE_CONSTANT(__c2) __REQUIRE_CONSTANT(__c3) __REQUIRE_CONSTANT(__c4) __REQUIRE_CONSTANT(__c5) __REQUIRE_CONSTANT(__c6) __REQUIRE_CONSTANT(__c7) { return (v128_t)(__i16x8){__c0, __c1, __c2, __c3, __c4, __c5, __c6, __c7}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_const(uint16_t __c0, uint16_t __c1, uint16_t __c2, uint16_t __c3, uint16_t __c4, uint16_t __c5, uint16_t __c6, uint16_t __c7) __REQUIRE_CONSTANT(__c0) __REQUIRE_CONSTANT(__c1) __REQUIRE_CONSTANT(__c2) __REQUIRE_CONSTANT(__c3) __REQUIRE_CONSTANT(__c4) __REQUIRE_CONSTANT(__c5) __REQUIRE_CONSTANT(__c6) __REQUIRE_CONSTANT(__c7) { return (v128_t)(__u16x8){__c0, __c1, __c2, __c3, __c4, __c5, __c6, __c7}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_const(int32_t __c0, int32_t __c1, int32_t __c2, int32_t __c3) __REQUIRE_CONSTANT(__c0) __REQUIRE_CONSTANT(__c1) __REQUIRE_CONSTANT(__c2) __REQUIRE_CONSTANT(__c3) { return (v128_t)(__i32x4){__c0, __c1, __c2, __c3}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_const(uint32_t __c0, uint32_t __c1, uint32_t __c2, uint32_t __c3) __REQUIRE_CONSTANT(__c0) __REQUIRE_CONSTANT(__c1) __REQUIRE_CONSTANT(__c2) __REQUIRE_CONSTANT(__c3) { return (v128_t)(__u32x4){__c0, __c1, __c2, __c3}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_const(int64_t __c0, int64_t __c1) __REQUIRE_CONSTANT(__c0) __REQUIRE_CONSTANT(__c1) { return (v128_t)(__i64x2){__c0, __c1}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_const(uint64_t __c0, uint64_t __c1) __REQUIRE_CONSTANT(__c0) __REQUIRE_CONSTANT(__c1) { return (v128_t)(__u64x2){__c0, __c1}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_const(float __c0, float __c1, float __c2, float __c3) __REQUIRE_CONSTANT(__c0) __REQUIRE_CONSTANT(__c1) __REQUIRE_CONSTANT(__c2) __REQUIRE_CONSTANT(__c3) { return (v128_t)(__f32x4){__c0, __c1, __c2, __c3}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_const(double __c0, double __c1) __REQUIRE_CONSTANT(__c0) __REQUIRE_CONSTANT(__c1) { return (v128_t)(__f64x2){__c0, __c1}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_const_splat(int8_t __c) __REQUIRE_CONSTANT(__c) { return (v128_t)(__i8x16){__c, __c, __c, __c, __c, __c, __c, __c, __c, __c, __c, __c, __c, __c, __c, __c}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_const_splat(uint8_t __c) __REQUIRE_CONSTANT(__c) { return (v128_t)(__u8x16){__c, __c, __c, __c, __c, __c, __c, __c, __c, __c, __c, __c, __c, __c, __c, __c}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_const_splat(int16_t __c) __REQUIRE_CONSTANT(__c) { return (v128_t)(__i16x8){__c, __c, __c, __c, __c, __c, __c, __c}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_const_splat(uint16_t __c) __REQUIRE_CONSTANT(__c) { return (v128_t)(__u16x8){__c, __c, __c, __c, __c, __c, __c, __c}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_const_splat(int32_t __c) __REQUIRE_CONSTANT(__c) { return (v128_t)(__i32x4){__c, __c, __c, __c}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_const_splat(uint32_t __c) __REQUIRE_CONSTANT(__c) { return (v128_t)(__u32x4){__c, __c, __c, __c}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_const_splat(int64_t __c) __REQUIRE_CONSTANT(__c) { return (v128_t)(__i64x2){__c, __c}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_const_splat(uint64_t __c) __REQUIRE_CONSTANT(__c) { return (v128_t)(__u64x2){__c, __c}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_const_splat(float __c) __REQUIRE_CONSTANT(__c) { return (v128_t)(__f32x4){__c, __c, __c, __c}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_const_splat(double __c) __REQUIRE_CONSTANT(__c) { return (v128_t)(__f64x2){__c, __c}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_splat(int8_t __a) { return (v128_t)(__i8x16){__a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_splat(uint8_t __a) { return (v128_t)(__u8x16){__a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a}; } static __inline__ int8_t __DEFAULT_FN_ATTRS wasm_i8x16_extract_lane(v128_t __a, int __i) __REQUIRE_CONSTANT(__i) { return ((__i8x16)__a)[__i]; } static __inline__ uint8_t __DEFAULT_FN_ATTRS wasm_u8x16_extract_lane(v128_t __a, int __i) __REQUIRE_CONSTANT(__i) { return ((__u8x16)__a)[__i]; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_replace_lane(v128_t __a, int __i, int8_t __b) __REQUIRE_CONSTANT(__i) { __i8x16 __v = (__i8x16)__a; __v[__i] = __b; return (v128_t)__v; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_replace_lane(v128_t __a, int __i, uint8_t __b) __REQUIRE_CONSTANT(__i) { __u8x16 __v = (__u8x16)__a; __v[__i] = __b; return (v128_t)__v; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_splat(int16_t __a) { return (v128_t)(__i16x8){__a, __a, __a, __a, __a, __a, __a, __a}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_splat(uint16_t __a) { return (v128_t)(__u16x8){__a, __a, __a, __a, __a, __a, __a, __a}; } static __inline__ int16_t __DEFAULT_FN_ATTRS wasm_i16x8_extract_lane(v128_t __a, int __i) __REQUIRE_CONSTANT(__i) { return ((__i16x8)__a)[__i]; } static __inline__ uint16_t __DEFAULT_FN_ATTRS wasm_u16x8_extract_lane(v128_t __a, int __i) __REQUIRE_CONSTANT(__i) { return ((__u16x8)__a)[__i]; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_replace_lane(v128_t __a, int __i, int16_t __b) __REQUIRE_CONSTANT(__i) { __i16x8 __v = (__i16x8)__a; __v[__i] = __b; return (v128_t)__v; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_replace_lane( v128_t __a, int __i, uint16_t __b) __REQUIRE_CONSTANT(__i) { __u16x8 __v = (__u16x8)__a; __v[__i] = __b; return (v128_t)__v; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_splat(int32_t __a) { return (v128_t)(__i32x4){__a, __a, __a, __a}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_splat(uint32_t __a) { return (v128_t)(__u32x4){__a, __a, __a, __a}; } static __inline__ int32_t __DEFAULT_FN_ATTRS wasm_i32x4_extract_lane(v128_t __a, int __i) __REQUIRE_CONSTANT(__i) { return ((__i32x4)__a)[__i]; } static __inline__ uint32_t __DEFAULT_FN_ATTRS wasm_u32x4_extract_lane(v128_t __a, int __i) __REQUIRE_CONSTANT(__i) { return ((__u32x4)__a)[__i]; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_replace_lane(v128_t __a, int __i, int32_t __b) __REQUIRE_CONSTANT(__i) { __i32x4 __v = (__i32x4)__a; __v[__i] = __b; return (v128_t)__v; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_replace_lane( v128_t __a, int __i, uint32_t __b) __REQUIRE_CONSTANT(__i) { __u32x4 __v = (__u32x4)__a; __v[__i] = __b; return (v128_t)__v; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_splat(int64_t __a) { return (v128_t)(__i64x2){__a, __a}; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_splat(uint64_t __a) { return (v128_t)(__u64x2){__a, __a}; } static __inline__ int64_t __DEFAULT_FN_ATTRS wasm_i64x2_extract_lane(v128_t __a, int __i) __REQUIRE_CONSTANT(__i) { return ((__i64x2)__a)[__i]; } static __inline__ uint64_t __DEFAULT_FN_ATTRS wasm_u64x2_extract_lane(v128_t __a, int __i) __REQUIRE_CONSTANT(__i) { return ((__u64x2)__a)[__i]; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_replace_lane(v128_t __a, int __i, int64_t __b) __REQUIRE_CONSTANT(__i) { __i64x2 __v = (__i64x2)__a; __v[__i] = __b; return (v128_t)__v; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_replace_lane( v128_t __a, int __i, uint64_t __b) __REQUIRE_CONSTANT(__i) { __u64x2 __v = (__u64x2)__a; __v[__i] = __b; return (v128_t)__v; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_splat(float __a) { return (v128_t)(__f32x4){__a, __a, __a, __a}; } static __inline__ float __DEFAULT_FN_ATTRS wasm_f32x4_extract_lane(v128_t __a, int __i) __REQUIRE_CONSTANT(__i) { return ((__f32x4)__a)[__i]; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_replace_lane(v128_t __a, int __i, float __b) __REQUIRE_CONSTANT(__i) { __f32x4 __v = (__f32x4)__a; __v[__i] = __b; return (v128_t)__v; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_splat(double __a) { return (v128_t)(__f64x2){__a, __a}; } static __inline__ double __DEFAULT_FN_ATTRS wasm_f64x2_extract_lane(v128_t __a, int __i) __REQUIRE_CONSTANT(__i) { return ((__f64x2)__a)[__i]; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_replace_lane(v128_t __a, int __i, double __b) __REQUIRE_CONSTANT(__i) { __f64x2 __v = (__f64x2)__a; __v[__i] = __b; return (v128_t)__v; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_eq(v128_t __a, v128_t __b) { return (v128_t)((__i8x16)__a == (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_ne(v128_t __a, v128_t __b) { return (v128_t)((__i8x16)__a != (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_lt(v128_t __a, v128_t __b) { return (v128_t)((__i8x16)__a < (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_lt(v128_t __a, v128_t __b) { return (v128_t)((__u8x16)__a < (__u8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_gt(v128_t __a, v128_t __b) { return (v128_t)((__i8x16)__a > (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_gt(v128_t __a, v128_t __b) { return (v128_t)((__u8x16)__a > (__u8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_le(v128_t __a, v128_t __b) { return (v128_t)((__i8x16)__a <= (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_le(v128_t __a, v128_t __b) { return (v128_t)((__u8x16)__a <= (__u8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_ge(v128_t __a, v128_t __b) { return (v128_t)((__i8x16)__a >= (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_ge(v128_t __a, v128_t __b) { return (v128_t)((__u8x16)__a >= (__u8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_eq(v128_t __a, v128_t __b) { return (v128_t)((__i16x8)__a == (__i16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_ne(v128_t __a, v128_t __b) { return (v128_t)((__u16x8)__a != (__u16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_lt(v128_t __a, v128_t __b) { return (v128_t)((__i16x8)__a < (__i16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_lt(v128_t __a, v128_t __b) { return (v128_t)((__u16x8)__a < (__u16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_gt(v128_t __a, v128_t __b) { return (v128_t)((__i16x8)__a > (__i16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_gt(v128_t __a, v128_t __b) { return (v128_t)((__u16x8)__a > (__u16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_le(v128_t __a, v128_t __b) { return (v128_t)((__i16x8)__a <= (__i16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_le(v128_t __a, v128_t __b) { return (v128_t)((__u16x8)__a <= (__u16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_ge(v128_t __a, v128_t __b) { return (v128_t)((__i16x8)__a >= (__i16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_ge(v128_t __a, v128_t __b) { return (v128_t)((__u16x8)__a >= (__u16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_eq(v128_t __a, v128_t __b) { return (v128_t)((__i32x4)__a == (__i32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_ne(v128_t __a, v128_t __b) { return (v128_t)((__i32x4)__a != (__i32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_lt(v128_t __a, v128_t __b) { return (v128_t)((__i32x4)__a < (__i32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_lt(v128_t __a, v128_t __b) { return (v128_t)((__u32x4)__a < (__u32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_gt(v128_t __a, v128_t __b) { return (v128_t)((__i32x4)__a > (__i32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_gt(v128_t __a, v128_t __b) { return (v128_t)((__u32x4)__a > (__u32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_le(v128_t __a, v128_t __b) { return (v128_t)((__i32x4)__a <= (__i32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_le(v128_t __a, v128_t __b) { return (v128_t)((__u32x4)__a <= (__u32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_ge(v128_t __a, v128_t __b) { return (v128_t)((__i32x4)__a >= (__i32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_ge(v128_t __a, v128_t __b) { return (v128_t)((__u32x4)__a >= (__u32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_eq(v128_t __a, v128_t __b) { return (v128_t)((__i64x2)__a == (__i64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_ne(v128_t __a, v128_t __b) { return (v128_t)((__i64x2)__a != (__i64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_lt(v128_t __a, v128_t __b) { return (v128_t)((__i64x2)__a < (__i64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_gt(v128_t __a, v128_t __b) { return (v128_t)((__i64x2)__a > (__i64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_le(v128_t __a, v128_t __b) { return (v128_t)((__i64x2)__a <= (__i64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_ge(v128_t __a, v128_t __b) { return (v128_t)((__i64x2)__a >= (__i64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_eq(v128_t __a, v128_t __b) { return (v128_t)((__f32x4)__a == (__f32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_ne(v128_t __a, v128_t __b) { return (v128_t)((__f32x4)__a != (__f32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_lt(v128_t __a, v128_t __b) { return (v128_t)((__f32x4)__a < (__f32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_gt(v128_t __a, v128_t __b) { return (v128_t)((__f32x4)__a > (__f32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_le(v128_t __a, v128_t __b) { return (v128_t)((__f32x4)__a <= (__f32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_ge(v128_t __a, v128_t __b) { return (v128_t)((__f32x4)__a >= (__f32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_eq(v128_t __a, v128_t __b) { return (v128_t)((__f64x2)__a == (__f64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_ne(v128_t __a, v128_t __b) { return (v128_t)((__f64x2)__a != (__f64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_lt(v128_t __a, v128_t __b) { return (v128_t)((__f64x2)__a < (__f64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_gt(v128_t __a, v128_t __b) { return (v128_t)((__f64x2)__a > (__f64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_le(v128_t __a, v128_t __b) { return (v128_t)((__f64x2)__a <= (__f64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_ge(v128_t __a, v128_t __b) { return (v128_t)((__f64x2)__a >= (__f64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_not(v128_t __a) { return ~__a; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_and(v128_t __a, v128_t __b) { return __a & __b; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_or(v128_t __a, v128_t __b) { return __a | __b; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_xor(v128_t __a, v128_t __b) { return __a ^ __b; } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_andnot(v128_t __a, v128_t __b) { return __a & ~__b; } static __inline__ bool __DEFAULT_FN_ATTRS wasm_v128_any_true(v128_t __a) { return __builtin_wasm_any_true_v128((__i8x16)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_bitselect(v128_t __a, v128_t __b, v128_t __mask) { return (v128_t)__builtin_wasm_bitselect((__i32x4)__a, (__i32x4)__b, (__i32x4)__mask); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_abs(v128_t __a) { return (v128_t)__builtin_wasm_abs_i8x16((__i8x16)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_neg(v128_t __a) { return (v128_t)(-(__u8x16)__a); } static __inline__ bool __DEFAULT_FN_ATTRS wasm_i8x16_all_true(v128_t __a) { return __builtin_wasm_all_true_i8x16((__i8x16)__a); } static __inline__ uint32_t __DEFAULT_FN_ATTRS wasm_i8x16_bitmask(v128_t __a) { return __builtin_wasm_bitmask_i8x16((__i8x16)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_popcnt(v128_t __a) { return (v128_t)__builtin_wasm_popcnt_i8x16((__i8x16)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_shl(v128_t __a, uint32_t __b) { return (v128_t)((__i8x16)__a << (__b & 0x7)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_shr(v128_t __a, uint32_t __b) { return (v128_t)((__i8x16)__a >> (__b & 0x7)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_shr(v128_t __a, uint32_t __b) { return (v128_t)((__u8x16)__a >> (__b & 0x7)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_add(v128_t __a, v128_t __b) { return (v128_t)((__u8x16)__a + (__u8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_add_sat(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_add_sat_s_i8x16((__i8x16)__a, (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_add_sat(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_add_sat_u_i8x16((__u8x16)__a, (__u8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_sub(v128_t __a, v128_t __b) { return (v128_t)((__u8x16)__a - (__u8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_sub_sat(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_sub_sat_s_i8x16((__i8x16)__a, (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_sub_sat(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_sub_sat_u_i8x16((__u8x16)__a, (__u8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_min(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_min_s_i8x16((__i8x16)__a, (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_min(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_min_u_i8x16((__u8x16)__a, (__u8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_max(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_max_s_i8x16((__i8x16)__a, (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_max(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_max_u_i8x16((__u8x16)__a, (__u8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_avgr(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_avgr_u_i8x16((__u8x16)__a, (__u8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_abs(v128_t __a) { return (v128_t)__builtin_wasm_abs_i16x8((__i16x8)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_neg(v128_t __a) { return (v128_t)(-(__u16x8)__a); } static __inline__ bool __DEFAULT_FN_ATTRS wasm_i16x8_all_true(v128_t __a) { return __builtin_wasm_all_true_i16x8((__i16x8)__a); } static __inline__ uint32_t __DEFAULT_FN_ATTRS wasm_i16x8_bitmask(v128_t __a) { return __builtin_wasm_bitmask_i16x8((__i16x8)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_shl(v128_t __a, uint32_t __b) { return (v128_t)((__i16x8)__a << (__b & 0xF)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_shr(v128_t __a, uint32_t __b) { return (v128_t)((__i16x8)__a >> (__b & 0xF)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_shr(v128_t __a, uint32_t __b) { return (v128_t)((__u16x8)__a >> (__b & 0xF)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_add(v128_t __a, v128_t __b) { return (v128_t)((__u16x8)__a + (__u16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_add_sat(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_add_sat_s_i16x8((__i16x8)__a, (__i16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_add_sat(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_add_sat_u_i16x8((__u16x8)__a, (__u16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_sub(v128_t __a, v128_t __b) { return (v128_t)((__i16x8)__a - (__i16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_sub_sat(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_sub_sat_s_i16x8((__i16x8)__a, (__i16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_sub_sat(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_sub_sat_u_i16x8((__u16x8)__a, (__u16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_mul(v128_t __a, v128_t __b) { return (v128_t)((__u16x8)__a * (__u16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_min(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_min_s_i16x8((__i16x8)__a, (__i16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_min(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_min_u_i16x8((__u16x8)__a, (__u16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_max(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_max_s_i16x8((__i16x8)__a, (__i16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_max(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_max_u_i16x8((__u16x8)__a, (__u16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_avgr(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_avgr_u_i16x8((__u16x8)__a, (__u16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_abs(v128_t __a) { return (v128_t)__builtin_wasm_abs_i32x4((__i32x4)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_neg(v128_t __a) { return (v128_t)(-(__u32x4)__a); } static __inline__ bool __DEFAULT_FN_ATTRS wasm_i32x4_all_true(v128_t __a) { return __builtin_wasm_all_true_i32x4((__i32x4)__a); } static __inline__ uint32_t __DEFAULT_FN_ATTRS wasm_i32x4_bitmask(v128_t __a) { return __builtin_wasm_bitmask_i32x4((__i32x4)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_shl(v128_t __a, uint32_t __b) { return (v128_t)((__i32x4)__a << (__b & 0x1F)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_shr(v128_t __a, uint32_t __b) { return (v128_t)((__i32x4)__a >> (__b & 0x1F)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_shr(v128_t __a, uint32_t __b) { return (v128_t)((__u32x4)__a >> (__b & 0x1F)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_add(v128_t __a, v128_t __b) { return (v128_t)((__u32x4)__a + (__u32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_sub(v128_t __a, v128_t __b) { return (v128_t)((__u32x4)__a - (__u32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_mul(v128_t __a, v128_t __b) { return (v128_t)((__u32x4)__a * (__u32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_min(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_min_s_i32x4((__i32x4)__a, (__i32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_min(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_min_u_i32x4((__u32x4)__a, (__u32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_max(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_max_s_i32x4((__i32x4)__a, (__i32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_max(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_max_u_i32x4((__u32x4)__a, (__u32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_dot_i16x8(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_dot_s_i32x4_i16x8((__i16x8)__a, (__i16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_abs(v128_t __a) { return (v128_t)__builtin_wasm_abs_i64x2((__i64x2)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_neg(v128_t __a) { return (v128_t)(-(__u64x2)__a); } static __inline__ bool __DEFAULT_FN_ATTRS wasm_i64x2_all_true(v128_t __a) { return __builtin_wasm_all_true_i64x2((__i64x2)__a); } static __inline__ uint32_t __DEFAULT_FN_ATTRS wasm_i64x2_bitmask(v128_t __a) { return __builtin_wasm_bitmask_i64x2((__i64x2)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_shl(v128_t __a, uint32_t __b) { return (v128_t)((__i64x2)__a << ((int64_t)__b & 0x3F)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_shr(v128_t __a, uint32_t __b) { return (v128_t)((__i64x2)__a >> ((int64_t)__b & 0x3F)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_shr(v128_t __a, uint32_t __b) { return (v128_t)((__u64x2)__a >> ((int64_t)__b & 0x3F)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_add(v128_t __a, v128_t __b) { return (v128_t)((__u64x2)__a + (__u64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_sub(v128_t __a, v128_t __b) { return (v128_t)((__u64x2)__a - (__u64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_mul(v128_t __a, v128_t __b) { return (v128_t)((__u64x2)__a * (__u64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_abs(v128_t __a) { return (v128_t)__builtin_wasm_abs_f32x4((__f32x4)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_neg(v128_t __a) { return (v128_t)(-(__f32x4)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_sqrt(v128_t __a) { return (v128_t)__builtin_wasm_sqrt_f32x4((__f32x4)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_ceil(v128_t __a) { return (v128_t)__builtin_wasm_ceil_f32x4((__f32x4)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_floor(v128_t __a) { return (v128_t)__builtin_wasm_floor_f32x4((__f32x4)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_trunc(v128_t __a) { return (v128_t)__builtin_wasm_trunc_f32x4((__f32x4)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_nearest(v128_t __a) { return (v128_t)__builtin_wasm_nearest_f32x4((__f32x4)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_add(v128_t __a, v128_t __b) { return (v128_t)((__f32x4)__a + (__f32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_sub(v128_t __a, v128_t __b) { return (v128_t)((__f32x4)__a - (__f32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_mul(v128_t __a, v128_t __b) { return (v128_t)((__f32x4)__a * (__f32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_div(v128_t __a, v128_t __b) { return (v128_t)((__f32x4)__a / (__f32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_min(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_min_f32x4((__f32x4)__a, (__f32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_max(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_max_f32x4((__f32x4)__a, (__f32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_pmin(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_pmin_f32x4((__f32x4)__a, (__f32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_pmax(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_pmax_f32x4((__f32x4)__a, (__f32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_abs(v128_t __a) { return (v128_t)__builtin_wasm_abs_f64x2((__f64x2)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_neg(v128_t __a) { return (v128_t)(-(__f64x2)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_sqrt(v128_t __a) { return (v128_t)__builtin_wasm_sqrt_f64x2((__f64x2)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_ceil(v128_t __a) { return (v128_t)__builtin_wasm_ceil_f64x2((__f64x2)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_floor(v128_t __a) { return (v128_t)__builtin_wasm_floor_f64x2((__f64x2)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_trunc(v128_t __a) { return (v128_t)__builtin_wasm_trunc_f64x2((__f64x2)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_nearest(v128_t __a) { return (v128_t)__builtin_wasm_nearest_f64x2((__f64x2)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_add(v128_t __a, v128_t __b) { return (v128_t)((__f64x2)__a + (__f64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_sub(v128_t __a, v128_t __b) { return (v128_t)((__f64x2)__a - (__f64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_mul(v128_t __a, v128_t __b) { return (v128_t)((__f64x2)__a * (__f64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_div(v128_t __a, v128_t __b) { return (v128_t)((__f64x2)__a / (__f64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_min(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_min_f64x2((__f64x2)__a, (__f64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_max(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_max_f64x2((__f64x2)__a, (__f64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_pmin(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_pmin_f64x2((__f64x2)__a, (__f64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_pmax(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_pmax_f64x2((__f64x2)__a, (__f64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_trunc_sat_f32x4(v128_t __a) { return (v128_t)__builtin_wasm_trunc_saturate_s_i32x4_f32x4((__f32x4)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_trunc_sat_f32x4(v128_t __a) { return (v128_t)__builtin_wasm_trunc_saturate_u_i32x4_f32x4((__f32x4)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_convert_i32x4(v128_t __a) { return (v128_t) __builtin_convertvector((__i32x4)__a, __f32x4); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_convert_u32x4(v128_t __a) { return (v128_t) __builtin_convertvector((__u32x4)__a, __f32x4); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_convert_low_i32x4(v128_t __a) { return (v128_t) __builtin_convertvector((__i32x2){__a[0], __a[1]}, __f64x2); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_convert_low_u32x4(v128_t __a) { return (v128_t) __builtin_convertvector((__u32x2){__a[0], __a[1]}, __f64x2); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_trunc_sat_f64x2_zero(v128_t __a) { return (v128_t)__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4((__f64x2)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_trunc_sat_f64x2_zero(v128_t __a) { return (v128_t)__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4((__f64x2)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_demote_f64x2_zero(v128_t __a) { return (v128_t) __builtin_convertvector( __builtin_shufflevector((__f64x2)__a, (__f64x2){0, 0}, 0, 1, 2, 3), __f32x4); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_promote_low_f32x4(v128_t __a) { return (v128_t) __builtin_convertvector( (__f32x2){((__f32x4)__a)[0], ((__f32x4)__a)[1]}, __f64x2); } #define wasm_i8x16_shuffle(__a, __b, __c0, __c1, __c2, __c3, __c4, __c5, __c6, \ __c7, __c8, __c9, __c10, __c11, __c12, __c13, \ __c14, __c15) \ ((v128_t)__builtin_wasm_shuffle_i8x16( \ (__i8x16)(__a), (__i8x16)(__b), __c0, __c1, __c2, __c3, __c4, __c5, \ __c6, __c7, __c8, __c9, __c10, __c11, __c12, __c13, __c14, __c15)) #define wasm_i16x8_shuffle(__a, __b, __c0, __c1, __c2, __c3, __c4, __c5, __c6, \ __c7) \ ((v128_t)__builtin_wasm_shuffle_i8x16( \ (__i8x16)(__a), (__i8x16)(__b), (__c0)*2, (__c0)*2 + 1, (__c1)*2, \ (__c1)*2 + 1, (__c2)*2, (__c2)*2 + 1, (__c3)*2, (__c3)*2 + 1, (__c4)*2, \ (__c4)*2 + 1, (__c5)*2, (__c5)*2 + 1, (__c6)*2, (__c6)*2 + 1, (__c7)*2, \ (__c7)*2 + 1)) #define wasm_i32x4_shuffle(__a, __b, __c0, __c1, __c2, __c3) \ ((v128_t)__builtin_wasm_shuffle_i8x16( \ (__i8x16)(__a), (__i8x16)(__b), (__c0)*4, (__c0)*4 + 1, (__c0)*4 + 2, \ (__c0)*4 + 3, (__c1)*4, (__c1)*4 + 1, (__c1)*4 + 2, (__c1)*4 + 3, \ (__c2)*4, (__c2)*4 + 1, (__c2)*4 + 2, (__c2)*4 + 3, (__c3)*4, \ (__c3)*4 + 1, (__c3)*4 + 2, (__c3)*4 + 3)) #define wasm_i64x2_shuffle(__a, __b, __c0, __c1) \ ((v128_t)__builtin_wasm_shuffle_i8x16( \ (__i8x16)(__a), (__i8x16)(__b), (__c0)*8, (__c0)*8 + 1, (__c0)*8 + 2, \ (__c0)*8 + 3, (__c0)*8 + 4, (__c0)*8 + 5, (__c0)*8 + 6, (__c0)*8 + 7, \ (__c1)*8, (__c1)*8 + 1, (__c1)*8 + 2, (__c1)*8 + 3, (__c1)*8 + 4, \ (__c1)*8 + 5, (__c1)*8 + 6, (__c1)*8 + 7)) static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_swizzle(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_swizzle_i8x16((__i8x16)__a, (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_narrow_i16x8(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_narrow_s_i8x16_i16x8((__i16x8)__a, (__i16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_narrow_i16x8(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_narrow_u_i8x16_i16x8((__i16x8)__a, (__i16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_narrow_i32x4(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_narrow_s_i16x8_i32x4((__i32x4)__a, (__i32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_narrow_i32x4(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_narrow_u_i16x8_i32x4((__i32x4)__a, (__i32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_extend_low_i8x16(v128_t __a) { return (v128_t) __builtin_convertvector( (__i8x8){((__i8x16)__a)[0], ((__i8x16)__a)[1], ((__i8x16)__a)[2], ((__i8x16)__a)[3], ((__i8x16)__a)[4], ((__i8x16)__a)[5], ((__i8x16)__a)[6], ((__i8x16)__a)[7]}, __i16x8); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_extend_high_i8x16(v128_t __a) { return (v128_t) __builtin_convertvector( (__i8x8){((__i8x16)__a)[8], ((__i8x16)__a)[9], ((__i8x16)__a)[10], ((__i8x16)__a)[11], ((__i8x16)__a)[12], ((__i8x16)__a)[13], ((__i8x16)__a)[14], ((__i8x16)__a)[15]}, __i16x8); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_extend_low_u8x16(v128_t __a) { return (v128_t) __builtin_convertvector( (__u8x8){((__u8x16)__a)[0], ((__u8x16)__a)[1], ((__u8x16)__a)[2], ((__u8x16)__a)[3], ((__u8x16)__a)[4], ((__u8x16)__a)[5], ((__u8x16)__a)[6], ((__u8x16)__a)[7]}, __u16x8); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_extend_high_u8x16(v128_t __a) { return (v128_t) __builtin_convertvector( (__u8x8){((__u8x16)__a)[8], ((__u8x16)__a)[9], ((__u8x16)__a)[10], ((__u8x16)__a)[11], ((__u8x16)__a)[12], ((__u8x16)__a)[13], ((__u8x16)__a)[14], ((__u8x16)__a)[15]}, __u16x8); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_extend_low_i16x8(v128_t __a) { return (v128_t) __builtin_convertvector( (__i16x4){((__i16x8)__a)[0], ((__i16x8)__a)[1], ((__i16x8)__a)[2], ((__i16x8)__a)[3]}, __i32x4); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_extend_high_i16x8(v128_t __a) { return (v128_t) __builtin_convertvector( (__i16x4){((__i16x8)__a)[4], ((__i16x8)__a)[5], ((__i16x8)__a)[6], ((__i16x8)__a)[7]}, __i32x4); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_extend_low_u16x8(v128_t __a) { return (v128_t) __builtin_convertvector( (__u16x4){((__u16x8)__a)[0], ((__u16x8)__a)[1], ((__u16x8)__a)[2], ((__u16x8)__a)[3]}, __u32x4); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_extend_high_u16x8(v128_t __a) { return (v128_t) __builtin_convertvector( (__u16x4){((__u16x8)__a)[4], ((__u16x8)__a)[5], ((__u16x8)__a)[6], ((__u16x8)__a)[7]}, __u32x4); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_extend_low_i32x4(v128_t __a) { return (v128_t) __builtin_convertvector( (__i32x2){((__i32x4)__a)[0], ((__i32x4)__a)[1]}, __i64x2); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_extend_high_i32x4(v128_t __a) { return (v128_t) __builtin_convertvector( (__i32x2){((__i32x4)__a)[2], ((__i32x4)__a)[3]}, __i64x2); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_extend_low_u32x4(v128_t __a) { return (v128_t) __builtin_convertvector( (__u32x2){((__u32x4)__a)[0], ((__u32x4)__a)[1]}, __u64x2); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_extend_high_u32x4(v128_t __a) { return (v128_t) __builtin_convertvector( (__u32x2){((__u32x4)__a)[2], ((__u32x4)__a)[3]}, __u64x2); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_extadd_pairwise_i8x16(v128_t __a) { return (v128_t)__builtin_wasm_extadd_pairwise_i8x16_s_i16x8((__i8x16)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_extadd_pairwise_u8x16(v128_t __a) { return (v128_t)__builtin_wasm_extadd_pairwise_i8x16_u_i16x8((__u8x16)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_extadd_pairwise_i16x8(v128_t __a) { return (v128_t)__builtin_wasm_extadd_pairwise_i16x8_s_i32x4((__i16x8)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_extadd_pairwise_u16x8(v128_t __a) { return (v128_t)__builtin_wasm_extadd_pairwise_i16x8_u_i32x4((__u16x8)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_extmul_low_i8x16(v128_t __a, v128_t __b) { return (v128_t)((__i16x8)wasm_i16x8_extend_low_i8x16(__a) * (__i16x8)wasm_i16x8_extend_low_i8x16(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_extmul_high_i8x16(v128_t __a, v128_t __b) { return (v128_t)((__i16x8)wasm_i16x8_extend_high_i8x16(__a) * (__i16x8)wasm_i16x8_extend_high_i8x16(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_extmul_low_u8x16(v128_t __a, v128_t __b) { return (v128_t)((__u16x8)wasm_u16x8_extend_low_u8x16(__a) * (__u16x8)wasm_u16x8_extend_low_u8x16(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_extmul_high_u8x16(v128_t __a, v128_t __b) { return (v128_t)((__u16x8)wasm_u16x8_extend_high_u8x16(__a) * (__u16x8)wasm_u16x8_extend_high_u8x16(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_extmul_low_i16x8(v128_t __a, v128_t __b) { return (v128_t)((__i32x4)wasm_i32x4_extend_low_i16x8(__a) * (__i32x4)wasm_i32x4_extend_low_i16x8(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_extmul_high_i16x8(v128_t __a, v128_t __b) { return (v128_t)((__i32x4)wasm_i32x4_extend_high_i16x8(__a) * (__i32x4)wasm_i32x4_extend_high_i16x8(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_extmul_low_u16x8(v128_t __a, v128_t __b) { return (v128_t)((__u32x4)wasm_u32x4_extend_low_u16x8(__a) * (__u32x4)wasm_u32x4_extend_low_u16x8(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_extmul_high_u16x8(v128_t __a, v128_t __b) { return (v128_t)((__u32x4)wasm_u32x4_extend_high_u16x8(__a) * (__u32x4)wasm_u32x4_extend_high_u16x8(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_extmul_low_i32x4(v128_t __a, v128_t __b) { return (v128_t)((__i64x2)wasm_i64x2_extend_low_i32x4(__a) * (__i64x2)wasm_i64x2_extend_low_i32x4(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_extmul_high_i32x4(v128_t __a, v128_t __b) { return (v128_t)((__i64x2)wasm_i64x2_extend_high_i32x4(__a) * (__i64x2)wasm_i64x2_extend_high_i32x4(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_extmul_low_u32x4(v128_t __a, v128_t __b) { return (v128_t)((__u64x2)wasm_u64x2_extend_low_u32x4(__a) * (__u64x2)wasm_u64x2_extend_low_u32x4(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_extmul_high_u32x4(v128_t __a, v128_t __b) { return (v128_t)((__u64x2)wasm_u64x2_extend_high_u32x4(__a) * (__u64x2)wasm_u64x2_extend_high_u32x4(__b)); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_q15mulr_sat(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_q15mulr_sat_s_i16x8((__i16x8)__a, (__i16x8)__b); } // Old intrinsic names supported to ease transitioning to the standard names. Do // not use these; they will be removed in the near future. #define __DEPRECATED_FN_ATTRS(__replacement) \ __DEFAULT_FN_ATTRS __attribute__( \ (deprecated("use " __replacement " instead", __replacement))) #define __WASM_STR(X) #X #ifdef __DEPRECATED #define __DEPRECATED_WASM_MACRO(__name, __replacement) \ _Pragma(__WASM_STR(GCC warning( \ "'" __name "' is deprecated: use '" __replacement "' instead"))) #else #define __DEPRECATED_WASM_MACRO(__name, __replacement) #endif static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_v128_load8_splat") wasm_v8x16_load_splat(const void *__mem) { return wasm_v128_load8_splat(__mem); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_v128_load16_splat") wasm_v16x8_load_splat(const void *__mem) { return wasm_v128_load16_splat(__mem); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_v128_load32_splat") wasm_v32x4_load_splat(const void *__mem) { return wasm_v128_load32_splat(__mem); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_v128_load64_splat") wasm_v64x2_load_splat(const void *__mem) { return wasm_v128_load64_splat(__mem); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_i16x8_load8x8") wasm_i16x8_load_8x8(const void *__mem) { return wasm_i16x8_load8x8(__mem); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_u16x8_load8x8") wasm_u16x8_load_8x8(const void *__mem) { return wasm_u16x8_load8x8(__mem); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_i32x4_load16x4") wasm_i32x4_load_16x4(const void *__mem) { return wasm_i32x4_load16x4(__mem); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_u32x4_load16x4") wasm_u32x4_load_16x4(const void *__mem) { return wasm_u32x4_load16x4(__mem); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_i64x2_load32x2") wasm_i64x2_load_32x2(const void *__mem) { return wasm_i64x2_load32x2(__mem); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_u64x2_load32x2") wasm_u64x2_load_32x2(const void *__mem) { return wasm_u64x2_load32x2(__mem); } #define wasm_v8x16_shuffle(__a, __b, __c0, __c1, __c2, __c3, __c4, __c5, __c6, \ __c7, __c8, __c9, __c10, __c11, __c12, __c13, \ __c14, __c15) \ __DEPRECATED_WASM_MACRO("wasm_v8x16_shuffle", "wasm_i8x16_shuffle") \ wasm_i8x16_shuffle(__a, __b, __c0, __c1, __c2, __c3, __c4, __c5, __c6, __c7, \ __c8, __c9, __c10, __c11, __c12, __c13, __c14, __c15) #define wasm_v16x8_shuffle(__a, __b, __c0, __c1, __c2, __c3, __c4, __c5, __c6, \ __c7) \ __DEPRECATED_WASM_MACRO("wasm_v16x8_shuffle", "wasm_i16x8_shuffle") \ wasm_i16x8_shuffle(__a, __b, __c0, __c1, __c2, __c3, __c4, __c5, __c6, __c7) #define wasm_v32x4_shuffle(__a, __b, __c0, __c1, __c2, __c3) \ __DEPRECATED_WASM_MACRO("wasm_v32x4_shuffle", "wasm_i32x4_shuffle") \ wasm_i32x4_shuffle(__a, __b, __c0, __c1, __c2, __c3) #define wasm_v64x2_shuffle(__a, __b, __c0, __c1) \ __DEPRECATED_WASM_MACRO("wasm_v64x2_shuffle", "wasm_i64x2_shuffle") \ wasm_i64x2_shuffle(__a, __b, __c0, __c1) // Relaxed SIMD intrinsics #define __RELAXED_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("relaxed-simd"), \ __min_vector_width__(128))) static __inline__ v128_t __RELAXED_FN_ATTRS wasm_f32x4_relaxed_madd(v128_t __a, v128_t __b, v128_t __c) { return (v128_t)__builtin_wasm_relaxed_madd_f32x4((__f32x4)__a, (__f32x4)__b, (__f32x4)__c); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_f32x4_relaxed_nmadd(v128_t __a, v128_t __b, v128_t __c) { return (v128_t)__builtin_wasm_relaxed_nmadd_f32x4((__f32x4)__a, (__f32x4)__b, (__f32x4)__c); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_f64x2_relaxed_madd(v128_t __a, v128_t __b, v128_t __c) { return (v128_t)__builtin_wasm_relaxed_madd_f64x2((__f64x2)__a, (__f64x2)__b, (__f64x2)__c); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_f64x2_relaxed_nmadd(v128_t __a, v128_t __b, v128_t __c) { return (v128_t)__builtin_wasm_relaxed_nmadd_f64x2((__f64x2)__a, (__f64x2)__b, (__f64x2)__c); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_i8x16_relaxed_laneselect(v128_t __a, v128_t __b, v128_t __m) { return (v128_t)__builtin_wasm_relaxed_laneselect_i8x16( (__i8x16)__a, (__i8x16)__b, (__i8x16)__m); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_i16x8_relaxed_laneselect(v128_t __a, v128_t __b, v128_t __m) { return (v128_t)__builtin_wasm_relaxed_laneselect_i16x8( (__i16x8)__a, (__i16x8)__b, (__i16x8)__m); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_i32x4_relaxed_laneselect(v128_t __a, v128_t __b, v128_t __m) { return (v128_t)__builtin_wasm_relaxed_laneselect_i32x4( (__i32x4)__a, (__i32x4)__b, (__i32x4)__m); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_i64x2_relaxed_laneselect(v128_t __a, v128_t __b, v128_t __m) { return (v128_t)__builtin_wasm_relaxed_laneselect_i64x2( (__i64x2)__a, (__i64x2)__b, (__i64x2)__m); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_i8x16_relaxed_swizzle(v128_t __a, v128_t __s) { return (v128_t)__builtin_wasm_relaxed_swizzle_i8x16((__i8x16)__a, (__i8x16)__s); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_f32x4_relaxed_min(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_relaxed_min_f32x4((__f32x4)__a, (__f32x4)__b); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_f32x4_relaxed_max(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_relaxed_max_f32x4((__f32x4)__a, (__f32x4)__b); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_f64x2_relaxed_min(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_relaxed_min_f64x2((__f64x2)__a, (__f64x2)__b); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_f64x2_relaxed_max(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_relaxed_max_f64x2((__f64x2)__a, (__f64x2)__b); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_i32x4_relaxed_trunc_f32x4(v128_t __a) { return (v128_t)__builtin_wasm_relaxed_trunc_s_i32x4_f32x4((__f32x4)__a); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_u32x4_relaxed_trunc_f32x4(v128_t __a) { return (v128_t)__builtin_wasm_relaxed_trunc_u_i32x4_f32x4((__f32x4)__a); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_i32x4_relaxed_trunc_f64x2_zero(v128_t __a) { return (v128_t)__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2((__f64x2)__a); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_u32x4_relaxed_trunc_f64x2_zero(v128_t __a) { return (v128_t)__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2((__f64x2)__a); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_i16x8_relaxed_q15mulr(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_relaxed_q15mulr_s_i16x8((__i16x8)__a, (__i16x8)__b); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_i16x8_relaxed_dot_i8x16_i7x16(v128_t __a, v128_t __b) { return (v128_t)__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8((__i8x16)__a, (__i8x16)__b); } static __inline__ v128_t __RELAXED_FN_ATTRS wasm_i32x4_relaxed_dot_i8x16_i7x16_add(v128_t __a, v128_t __b, v128_t __c) { return (v128_t)__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4( (__i8x16)__a, (__i8x16)__b, (__i32x4)__c); } // Deprecated intrinsics static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_i8x16_swizzle") wasm_v8x16_swizzle(v128_t __a, v128_t __b) { return wasm_i8x16_swizzle(__a, __b); } static __inline__ bool __DEPRECATED_FN_ATTRS("wasm_v128_any_true") wasm_i8x16_any_true(v128_t __a) { return wasm_v128_any_true(__a); } static __inline__ bool __DEPRECATED_FN_ATTRS("wasm_v128_any_true") wasm_i16x8_any_true(v128_t __a) { return wasm_v128_any_true(__a); } static __inline__ bool __DEPRECATED_FN_ATTRS("wasm_v128_any_true") wasm_i32x4_any_true(v128_t __a) { return wasm_v128_any_true(__a); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_i8x16_add_sat") wasm_i8x16_add_saturate(v128_t __a, v128_t __b) { return wasm_i8x16_add_sat(__a, __b); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_u8x16_add_sat") wasm_u8x16_add_saturate(v128_t __a, v128_t __b) { return wasm_u8x16_add_sat(__a, __b); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_i8x16_sub_sat") wasm_i8x16_sub_saturate(v128_t __a, v128_t __b) { return wasm_i8x16_sub_sat(__a, __b); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_u8x16_sub_sat") wasm_u8x16_sub_saturate(v128_t __a, v128_t __b) { return wasm_u8x16_sub_sat(__a, __b); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_i16x8_add_sat") wasm_i16x8_add_saturate(v128_t __a, v128_t __b) { return wasm_i16x8_add_sat(__a, __b); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_u16x8_add_sat") wasm_u16x8_add_saturate(v128_t __a, v128_t __b) { return wasm_u16x8_add_sat(__a, __b); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_i16x8_sub_sat") wasm_i16x8_sub_saturate(v128_t __a, v128_t __b) { return wasm_i16x8_sub_sat(__a, __b); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_u16x8_sub_sat") wasm_u16x8_sub_saturate(v128_t __a, v128_t __b) { return wasm_u16x8_sub_sat(__a, __b); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_i16x8_extend_low_i8x16") wasm_i16x8_widen_low_i8x16(v128_t __a) { return wasm_i16x8_extend_low_i8x16(__a); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_i16x8_extend_high_i8x16") wasm_i16x8_widen_high_i8x16(v128_t __a) { return wasm_i16x8_extend_high_i8x16(__a); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_u16x8_extend_low_u8x16") wasm_i16x8_widen_low_u8x16(v128_t __a) { return wasm_u16x8_extend_low_u8x16(__a); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_u16x8_extend_high_u8x16") wasm_i16x8_widen_high_u8x16(v128_t __a) { return wasm_u16x8_extend_high_u8x16(__a); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_i32x4_extend_low_i16x8") wasm_i32x4_widen_low_i16x8(v128_t __a) { return wasm_i32x4_extend_low_i16x8(__a); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_i32x4_extend_high_i16x8") wasm_i32x4_widen_high_i16x8(v128_t __a) { return wasm_i32x4_extend_high_i16x8(__a); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_u32x4_extend_low_u16x8") wasm_i32x4_widen_low_u16x8(v128_t __a) { return wasm_u32x4_extend_low_u16x8(__a); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_u32x4_extend_high_u16x8") wasm_i32x4_widen_high_u16x8(v128_t __a) { return wasm_u32x4_extend_high_u16x8(__a); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_i32x4_trunc_sat_f32x4") wasm_i32x4_trunc_saturate_f32x4(v128_t __a) { return wasm_i32x4_trunc_sat_f32x4(__a); } static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_u32x4_trunc_sat_f32x4") wasm_u32x4_trunc_saturate_f32x4(v128_t __a) { return wasm_u32x4_trunc_sat_f32x4(__a); } // Undefine helper macros #undef __DEFAULT_FN_ATTRS #undef __DEPRECATED_FN_ATTRS #endif // __WASM_SIMD128_H /*===-------------- wbnoinvdintrin.h - wbnoinvd intrinsic-------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #if !defined __X86INTRIN_H && !defined __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __WBNOINVDINTRIN_H #define __WBNOINVDINTRIN_H static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("wbnoinvd"))) _wbnoinvd (void) { __builtin_ia32_wbnoinvd (); } #endif /* __WBNOINVDINTRIN_H */ /*===---- wmmintrin.h - AES intrinsics ------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __WMMINTRIN_H #define __WMMINTRIN_H #if !defined(__i386__) && !defined(__x86_64__) #error "This header is only meant to be used on x86 and x64 architecture" #endif #include #include <__wmmintrin_aes.h> #include <__wmmintrin_pclmul.h> #endif /* __WMMINTRIN_H */ /*===--------------- x86gprintrin.h - X86 GPR intrinsics ------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __X86GPRINTRIN_H #define __X86GPRINTRIN_H #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__HRESET__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__UINTR__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__CRC32__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__PRFCHI__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__RAOINT__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__CMPCCXADD__) #include #endif #if defined(__i386__) #define __SAVE_GPRBX "mov {%%ebx, %%eax |eax, ebx};" #define __RESTORE_GPRBX "mov {%%eax, %%ebx |ebx, eax};" #define __TMPGPR "eax" #else // When in 64-bit target, the 32-bit operands generate a 32-bit result, // zero-extended to a 64-bit result in the destination general-purpose, // It means "mov x %ebx" will clobber the higher 32 bits of rbx, so we // should preserve the 64-bit register rbx. #define __SAVE_GPRBX "mov {%%rbx, %%rax |rax, rbx};" #define __RESTORE_GPRBX "mov {%%rax, %%rbx |rbx, rax};" #define __TMPGPR "rax" #endif #define __SSC_MARK(__Tag) \ __asm__ __volatile__( __SAVE_GPRBX \ "mov {%0, %%ebx|ebx, %0}; " \ ".byte 0x64, 0x67, 0x90; " \ __RESTORE_GPRBX \ ::"i"(__Tag) \ : __TMPGPR ); #endif /* __X86GPRINTRIN_H */ /*===---- x86intrin.h - X86 intrinsics -------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __X86INTRIN_H #define __X86INTRIN_H #include #include #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__3dNOW__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__PRFCHW__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__SSE4A__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__FMA4__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__XOP__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__TBM__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__LWP__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__MWAITX__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__CLZERO__) #include #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__RDPRU__) #include #endif #endif /* __X86INTRIN_H */ /*===---- xmmintrin.h - SSE intrinsics -------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __XMMINTRIN_H #define __XMMINTRIN_H #if !defined(__i386__) && !defined(__x86_64__) #error "This header is only meant to be used on x86 and x64 architecture" #endif #include typedef int __v4si __attribute__((__vector_size__(16))); typedef float __v4sf __attribute__((__vector_size__(16))); typedef float __m128 __attribute__((__vector_size__(16), __aligned__(16))); typedef float __m128_u __attribute__((__vector_size__(16), __aligned__(1))); /* Unsigned types */ typedef unsigned int __v4su __attribute__((__vector_size__(16))); /* This header should only be included in a hosted environment as it depends on * a standard library to provide allocation routines. */ #if __STDC_HOSTED__ #include #endif /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse"), __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,sse"), __min_vector_width__(64))) /// Adds the 32-bit float values in the low-order bits of the operands. /// /// \headerfile /// /// This intrinsic corresponds to the VADDSS / ADDSS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the source operands. /// The lower 32 bits of this operand are used in the calculation. /// \param __b /// A 128-bit vector of [4 x float] containing one of the source operands. /// The lower 32 bits of this operand are used in the calculation. /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the sum /// of the lower 32 bits of both operands. The upper 96 bits are copied from /// the upper 96 bits of the first source operand. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_add_ss(__m128 __a, __m128 __b) { __a[0] += __b[0]; return __a; } /// Adds two 128-bit vectors of [4 x float], and returns the results of /// the addition. /// /// \headerfile /// /// This intrinsic corresponds to the VADDPS / ADDPS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the source operands. /// \param __b /// A 128-bit vector of [4 x float] containing one of the source operands. /// \returns A 128-bit vector of [4 x float] containing the sums of both /// operands. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_add_ps(__m128 __a, __m128 __b) { return (__m128)((__v4sf)__a + (__v4sf)__b); } /// Subtracts the 32-bit float value in the low-order bits of the second /// operand from the corresponding value in the first operand. /// /// \headerfile /// /// This intrinsic corresponds to the VSUBSS / SUBSS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing the minuend. The lower 32 bits /// of this operand are used in the calculation. /// \param __b /// A 128-bit vector of [4 x float] containing the subtrahend. The lower 32 /// bits of this operand are used in the calculation. /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the /// difference of the lower 32 bits of both operands. The upper 96 bits are /// copied from the upper 96 bits of the first source operand. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sub_ss(__m128 __a, __m128 __b) { __a[0] -= __b[0]; return __a; } /// Subtracts each of the values of the second operand from the first /// operand, both of which are 128-bit vectors of [4 x float] and returns /// the results of the subtraction. /// /// \headerfile /// /// This intrinsic corresponds to the VSUBPS / SUBPS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing the minuend. /// \param __b /// A 128-bit vector of [4 x float] containing the subtrahend. /// \returns A 128-bit vector of [4 x float] containing the differences between /// both operands. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sub_ps(__m128 __a, __m128 __b) { return (__m128)((__v4sf)__a - (__v4sf)__b); } /// Multiplies two 32-bit float values in the low-order bits of the /// operands. /// /// \headerfile /// /// This intrinsic corresponds to the VMULSS / MULSS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the source operands. /// The lower 32 bits of this operand are used in the calculation. /// \param __b /// A 128-bit vector of [4 x float] containing one of the source operands. /// The lower 32 bits of this operand are used in the calculation. /// \returns A 128-bit vector of [4 x float] containing the product of the lower /// 32 bits of both operands. The upper 96 bits are copied from the upper 96 /// bits of the first source operand. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mul_ss(__m128 __a, __m128 __b) { __a[0] *= __b[0]; return __a; } /// Multiplies two 128-bit vectors of [4 x float] and returns the /// results of the multiplication. /// /// \headerfile /// /// This intrinsic corresponds to the VMULPS / MULPS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the source operands. /// \param __b /// A 128-bit vector of [4 x float] containing one of the source operands. /// \returns A 128-bit vector of [4 x float] containing the products of both /// operands. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mul_ps(__m128 __a, __m128 __b) { return (__m128)((__v4sf)__a * (__v4sf)__b); } /// Divides the value in the low-order 32 bits of the first operand by /// the corresponding value in the second operand. /// /// \headerfile /// /// This intrinsic corresponds to the VDIVSS / DIVSS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing the dividend. The lower 32 /// bits of this operand are used in the calculation. /// \param __b /// A 128-bit vector of [4 x float] containing the divisor. The lower 32 bits /// of this operand are used in the calculation. /// \returns A 128-bit vector of [4 x float] containing the quotients of the /// lower 32 bits of both operands. The upper 96 bits are copied from the /// upper 96 bits of the first source operand. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_div_ss(__m128 __a, __m128 __b) { __a[0] /= __b[0]; return __a; } /// Divides two 128-bit vectors of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VDIVPS / DIVPS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing the dividend. /// \param __b /// A 128-bit vector of [4 x float] containing the divisor. /// \returns A 128-bit vector of [4 x float] containing the quotients of both /// operands. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_div_ps(__m128 __a, __m128 __b) { return (__m128)((__v4sf)__a / (__v4sf)__b); } /// Calculates the square root of the value stored in the low-order bits /// of a 128-bit vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VSQRTSS / SQRTSS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the calculation. /// \returns A 128-bit vector of [4 x float] containing the square root of the /// value in the low-order bits of the operand. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sqrt_ss(__m128 __a) { return (__m128)__builtin_ia32_sqrtss((__v4sf)__a); } /// Calculates the square roots of the values stored in a 128-bit vector /// of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VSQRTPS / SQRTPS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] containing the square roots of the /// values in the operand. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sqrt_ps(__m128 __a) { return __builtin_ia32_sqrtps((__v4sf)__a); } /// Calculates the approximate reciprocal of the value stored in the /// low-order bits of a 128-bit vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VRCPSS / RCPSS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the calculation. /// \returns A 128-bit vector of [4 x float] containing the approximate /// reciprocal of the value in the low-order bits of the operand. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rcp_ss(__m128 __a) { return (__m128)__builtin_ia32_rcpss((__v4sf)__a); } /// Calculates the approximate reciprocals of the values stored in a /// 128-bit vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VRCPPS / RCPPS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] containing the approximate /// reciprocals of the values in the operand. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rcp_ps(__m128 __a) { return (__m128)__builtin_ia32_rcpps((__v4sf)__a); } /// Calculates the approximate reciprocal of the square root of the value /// stored in the low-order bits of a 128-bit vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VRSQRTSS / RSQRTSS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the calculation. /// \returns A 128-bit vector of [4 x float] containing the approximate /// reciprocal of the square root of the value in the low-order bits of the /// operand. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rsqrt_ss(__m128 __a) { return __builtin_ia32_rsqrtss((__v4sf)__a); } /// Calculates the approximate reciprocals of the square roots of the /// values stored in a 128-bit vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VRSQRTPS / RSQRTPS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] containing the approximate /// reciprocals of the square roots of the values in the operand. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rsqrt_ps(__m128 __a) { return __builtin_ia32_rsqrtps((__v4sf)__a); } /// Compares two 32-bit float values in the low-order bits of both /// operands and returns the lesser value in the low-order bits of the /// vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VMINSS / MINSS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \param __b /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the /// minimum value between both operands. The upper 96 bits are copied from /// the upper 96 bits of the first source operand. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_min_ss(__m128 __a, __m128 __b) { return __builtin_ia32_minss((__v4sf)__a, (__v4sf)__b); } /// Compares two 128-bit vectors of [4 x float] and returns the lesser /// of each pair of values. /// /// \headerfile /// /// This intrinsic corresponds to the VMINPS / MINPS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the operands. /// \param __b /// A 128-bit vector of [4 x float] containing one of the operands. /// \returns A 128-bit vector of [4 x float] containing the minimum values /// between both operands. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_min_ps(__m128 __a, __m128 __b) { return __builtin_ia32_minps((__v4sf)__a, (__v4sf)__b); } /// Compares two 32-bit float values in the low-order bits of both /// operands and returns the greater value in the low-order bits of a 128-bit /// vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VMAXSS / MAXSS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \param __b /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the /// maximum value between both operands. The upper 96 bits are copied from /// the upper 96 bits of the first source operand. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_max_ss(__m128 __a, __m128 __b) { return __builtin_ia32_maxss((__v4sf)__a, (__v4sf)__b); } /// Compares two 128-bit vectors of [4 x float] and returns the greater /// of each pair of values. /// /// \headerfile /// /// This intrinsic corresponds to the VMAXPS / MAXPS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the operands. /// \param __b /// A 128-bit vector of [4 x float] containing one of the operands. /// \returns A 128-bit vector of [4 x float] containing the maximum values /// between both operands. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_max_ps(__m128 __a, __m128 __b) { return __builtin_ia32_maxps((__v4sf)__a, (__v4sf)__b); } /// Performs a bitwise AND of two 128-bit vectors of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VANDPS / ANDPS instructions. /// /// \param __a /// A 128-bit vector containing one of the source operands. /// \param __b /// A 128-bit vector containing one of the source operands. /// \returns A 128-bit vector of [4 x float] containing the bitwise AND of the /// values between both operands. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_and_ps(__m128 __a, __m128 __b) { return (__m128)((__v4su)__a & (__v4su)__b); } /// Performs a bitwise AND of two 128-bit vectors of [4 x float], using /// the one's complement of the values contained in the first source /// operand. /// /// \headerfile /// /// This intrinsic corresponds to the VANDNPS / ANDNPS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing the first source operand. The /// one's complement of this value is used in the bitwise AND. /// \param __b /// A 128-bit vector of [4 x float] containing the second source operand. /// \returns A 128-bit vector of [4 x float] containing the bitwise AND of the /// one's complement of the first operand and the values in the second /// operand. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_andnot_ps(__m128 __a, __m128 __b) { return (__m128)(~(__v4su)__a & (__v4su)__b); } /// Performs a bitwise OR of two 128-bit vectors of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VORPS / ORPS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the source operands. /// \param __b /// A 128-bit vector of [4 x float] containing one of the source operands. /// \returns A 128-bit vector of [4 x float] containing the bitwise OR of the /// values between both operands. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_or_ps(__m128 __a, __m128 __b) { return (__m128)((__v4su)__a | (__v4su)__b); } /// Performs a bitwise exclusive OR of two 128-bit vectors of /// [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VXORPS / XORPS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the source operands. /// \param __b /// A 128-bit vector of [4 x float] containing one of the source operands. /// \returns A 128-bit vector of [4 x float] containing the bitwise exclusive OR /// of the values between both operands. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_xor_ps(__m128 __a, __m128 __b) { return (__m128)((__v4su)__a ^ (__v4su)__b); } /// Compares two 32-bit float values in the low-order bits of both /// operands for equality and returns the result of the comparison in the /// low-order bits of a vector [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPEQSS / CMPEQSS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \param __b /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \returns A 128-bit vector of [4 x float] containing the comparison results /// in the low-order bits. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpeq_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpeqss((__v4sf)__a, (__v4sf)__b); } /// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] for equality. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPEQPS / CMPEQPS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] containing the comparison results. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpeq_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpeqps((__v4sf)__a, (__v4sf)__b); } /// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the value in the first operand is less than the /// corresponding value in the second operand and returns the result of the /// comparison in the low-order bits of a vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPLTSS / CMPLTSS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \param __b /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \returns A 128-bit vector of [4 x float] containing the comparison results /// in the low-order bits. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmplt_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpltss((__v4sf)__a, (__v4sf)__b); } /// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] to determine if the values in the first /// operand are less than those in the second operand. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPLTPS / CMPLTPS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] containing the comparison results. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmplt_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpltps((__v4sf)__a, (__v4sf)__b); } /// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the value in the first operand is less than or /// equal to the corresponding value in the second operand and returns the /// result of the comparison in the low-order bits of a vector of /// [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPLESS / CMPLESS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \param __b /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \returns A 128-bit vector of [4 x float] containing the comparison results /// in the low-order bits. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmple_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpless((__v4sf)__a, (__v4sf)__b); } /// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] to determine if the values in the first /// operand are less than or equal to those in the second operand. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPLEPS / CMPLEPS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] containing the comparison results. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmple_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpleps((__v4sf)__a, (__v4sf)__b); } /// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the value in the first operand is greater than /// the corresponding value in the second operand and returns the result of /// the comparison in the low-order bits of a vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPLTSS / CMPLTSS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \param __b /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \returns A 128-bit vector of [4 x float] containing the comparison results /// in the low-order bits. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpgt_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_shufflevector((__v4sf)__a, (__v4sf)__builtin_ia32_cmpltss((__v4sf)__b, (__v4sf)__a), 4, 1, 2, 3); } /// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] to determine if the values in the first /// operand are greater than those in the second operand. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPLTPS / CMPLTPS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] containing the comparison results. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpgt_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpltps((__v4sf)__b, (__v4sf)__a); } /// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the value in the first operand is greater than /// or equal to the corresponding value in the second operand and returns /// the result of the comparison in the low-order bits of a vector of /// [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPLESS / CMPLESS instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \param __b /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \returns A 128-bit vector of [4 x float] containing the comparison results /// in the low-order bits. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpge_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_shufflevector((__v4sf)__a, (__v4sf)__builtin_ia32_cmpless((__v4sf)__b, (__v4sf)__a), 4, 1, 2, 3); } /// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] to determine if the values in the first /// operand are greater than or equal to those in the second operand. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPLEPS / CMPLEPS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] containing the comparison results. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpge_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpleps((__v4sf)__b, (__v4sf)__a); } /// Compares two 32-bit float values in the low-order bits of both /// operands for inequality and returns the result of the comparison in the /// low-order bits of a vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNEQSS / CMPNEQSS /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \param __b /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \returns A 128-bit vector of [4 x float] containing the comparison results /// in the low-order bits. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpneq_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpneqss((__v4sf)__a, (__v4sf)__b); } /// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] for inequality. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNEQPS / CMPNEQPS /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] containing the comparison results. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpneq_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpneqps((__v4sf)__a, (__v4sf)__b); } /// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the value in the first operand is not less than /// the corresponding value in the second operand and returns the result of /// the comparison in the low-order bits of a vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNLTSS / CMPNLTSS /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \param __b /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \returns A 128-bit vector of [4 x float] containing the comparison results /// in the low-order bits. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnlt_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpnltss((__v4sf)__a, (__v4sf)__b); } /// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] to determine if the values in the first /// operand are not less than those in the second operand. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNLTPS / CMPNLTPS /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] containing the comparison results. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnlt_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpnltps((__v4sf)__a, (__v4sf)__b); } /// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the value in the first operand is not less than /// or equal to the corresponding value in the second operand and returns /// the result of the comparison in the low-order bits of a vector of /// [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNLESS / CMPNLESS /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \param __b /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \returns A 128-bit vector of [4 x float] containing the comparison results /// in the low-order bits. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnle_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpnless((__v4sf)__a, (__v4sf)__b); } /// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] to determine if the values in the first /// operand are not less than or equal to those in the second operand. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNLEPS / CMPNLEPS /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] containing the comparison results. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnle_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpnleps((__v4sf)__a, (__v4sf)__b); } /// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the value in the first operand is not greater /// than the corresponding value in the second operand and returns the /// result of the comparison in the low-order bits of a vector of /// [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNLTSS / CMPNLTSS /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \param __b /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \returns A 128-bit vector of [4 x float] containing the comparison results /// in the low-order bits. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpngt_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_shufflevector((__v4sf)__a, (__v4sf)__builtin_ia32_cmpnltss((__v4sf)__b, (__v4sf)__a), 4, 1, 2, 3); } /// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] to determine if the values in the first /// operand are not greater than those in the second operand. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNLTPS / CMPNLTPS /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] containing the comparison results. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpngt_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpnltps((__v4sf)__b, (__v4sf)__a); } /// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the value in the first operand is not greater /// than or equal to the corresponding value in the second operand and /// returns the result of the comparison in the low-order bits of a vector /// of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNLESS / CMPNLESS /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \param __b /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \returns A 128-bit vector of [4 x float] containing the comparison results /// in the low-order bits. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnge_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_shufflevector((__v4sf)__a, (__v4sf)__builtin_ia32_cmpnless((__v4sf)__b, (__v4sf)__a), 4, 1, 2, 3); } /// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] to determine if the values in the first /// operand are not greater than or equal to those in the second operand. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPNLEPS / CMPNLEPS /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] containing the comparison results. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnge_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpnleps((__v4sf)__b, (__v4sf)__a); } /// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the value in the first operand is ordered with /// respect to the corresponding value in the second operand and returns the /// result of the comparison in the low-order bits of a vector of /// [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPORDSS / CMPORDSS /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \param __b /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \returns A 128-bit vector of [4 x float] containing the comparison results /// in the low-order bits. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpord_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpordss((__v4sf)__a, (__v4sf)__b); } /// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] to determine if the values in the first /// operand are ordered with respect to those in the second operand. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPORDPS / CMPORDPS /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] containing the comparison results. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpord_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpordps((__v4sf)__a, (__v4sf)__b); } /// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the value in the first operand is unordered /// with respect to the corresponding value in the second operand and /// returns the result of the comparison in the low-order bits of a vector /// of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPUNORDSS / CMPUNORDSS /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \param __b /// A 128-bit vector of [4 x float] containing one of the operands. The lower /// 32 bits of this operand are used in the comparison. /// \returns A 128-bit vector of [4 x float] containing the comparison results /// in the low-order bits. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpunord_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpunordss((__v4sf)__a, (__v4sf)__b); } /// Compares each of the corresponding 32-bit float values of the /// 128-bit vectors of [4 x float] to determine if the values in the first /// operand are unordered with respect to those in the second operand. /// /// \headerfile /// /// This intrinsic corresponds to the VCMPUNORDPS / CMPUNORDPS /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] containing the comparison results. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpunord_ps(__m128 __a, __m128 __b) { return (__m128)__builtin_ia32_cmpunordps((__v4sf)__a, (__v4sf)__b); } /// Compares two 32-bit float values in the low-order bits of both /// operands for equality and returns the result of the comparison. /// /// If either of the two lower 32-bit values is NaN, 0 is returned. /// /// \headerfile /// /// This intrinsic corresponds to the VCOMISS / COMISS /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \param __b /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. If either of the /// two lower 32-bit values is NaN, 0 is returned. static __inline__ int __DEFAULT_FN_ATTRS _mm_comieq_ss(__m128 __a, __m128 __b) { return __builtin_ia32_comieq((__v4sf)__a, (__v4sf)__b); } /// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the first operand is less than the second /// operand and returns the result of the comparison. /// /// If either of the two lower 32-bit values is NaN, 0 is returned. /// /// \headerfile /// /// This intrinsic corresponds to the VCOMISS / COMISS /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \param __b /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. If either of the two /// lower 32-bit values is NaN, 0 is returned. static __inline__ int __DEFAULT_FN_ATTRS _mm_comilt_ss(__m128 __a, __m128 __b) { return __builtin_ia32_comilt((__v4sf)__a, (__v4sf)__b); } /// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the first operand is less than or equal to the /// second operand and returns the result of the comparison. /// /// If either of the two lower 32-bit values is NaN, 0 is returned. /// /// \headerfile /// /// This intrinsic corresponds to the VCOMISS / COMISS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \param __b /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. If either of the two /// lower 32-bit values is NaN, 0 is returned. static __inline__ int __DEFAULT_FN_ATTRS _mm_comile_ss(__m128 __a, __m128 __b) { return __builtin_ia32_comile((__v4sf)__a, (__v4sf)__b); } /// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the first operand is greater than the second /// operand and returns the result of the comparison. /// /// If either of the two lower 32-bit values is NaN, 0 is returned. /// /// \headerfile /// /// This intrinsic corresponds to the VCOMISS / COMISS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \param __b /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. If either of the /// two lower 32-bit values is NaN, 0 is returned. static __inline__ int __DEFAULT_FN_ATTRS _mm_comigt_ss(__m128 __a, __m128 __b) { return __builtin_ia32_comigt((__v4sf)__a, (__v4sf)__b); } /// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the first operand is greater than or equal to /// the second operand and returns the result of the comparison. /// /// If either of the two lower 32-bit values is NaN, 0 is returned. /// /// \headerfile /// /// This intrinsic corresponds to the VCOMISS / COMISS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \param __b /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. If either of the two /// lower 32-bit values is NaN, 0 is returned. static __inline__ int __DEFAULT_FN_ATTRS _mm_comige_ss(__m128 __a, __m128 __b) { return __builtin_ia32_comige((__v4sf)__a, (__v4sf)__b); } /// Compares two 32-bit float values in the low-order bits of both /// operands to determine if the first operand is not equal to the second /// operand and returns the result of the comparison. /// /// If either of the two lower 32-bit values is NaN, 1 is returned. /// /// \headerfile /// /// This intrinsic corresponds to the VCOMISS / COMISS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \param __b /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. If either of the /// two lower 32-bit values is NaN, 1 is returned. static __inline__ int __DEFAULT_FN_ATTRS _mm_comineq_ss(__m128 __a, __m128 __b) { return __builtin_ia32_comineq((__v4sf)__a, (__v4sf)__b); } /// Performs an unordered comparison of two 32-bit float values using /// the low-order bits of both operands to determine equality and returns /// the result of the comparison. /// /// If either of the two lower 32-bit values is NaN, 0 is returned. /// /// \headerfile /// /// This intrinsic corresponds to the VUCOMISS / UCOMISS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \param __b /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. If either of the two /// lower 32-bit values is NaN, 0 is returned. static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomieq_ss(__m128 __a, __m128 __b) { return __builtin_ia32_ucomieq((__v4sf)__a, (__v4sf)__b); } /// Performs an unordered comparison of two 32-bit float values using /// the low-order bits of both operands to determine if the first operand is /// less than the second operand and returns the result of the comparison. /// /// If either of the two lower 32-bit values is NaN, 0 is returned. /// /// \headerfile /// /// This intrinsic corresponds to the VUCOMISS / UCOMISS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \param __b /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. If either of the two /// lower 32-bit values is NaN, 0 is returned. static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomilt_ss(__m128 __a, __m128 __b) { return __builtin_ia32_ucomilt((__v4sf)__a, (__v4sf)__b); } /// Performs an unordered comparison of two 32-bit float values using /// the low-order bits of both operands to determine if the first operand is /// less than or equal to the second operand and returns the result of the /// comparison. /// /// If either of the two lower 32-bit values is NaN, 0 is returned. /// /// \headerfile /// /// This intrinsic corresponds to the VUCOMISS / UCOMISS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \param __b /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. If either of the two /// lower 32-bit values is NaN, 0 is returned. static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomile_ss(__m128 __a, __m128 __b) { return __builtin_ia32_ucomile((__v4sf)__a, (__v4sf)__b); } /// Performs an unordered comparison of two 32-bit float values using /// the low-order bits of both operands to determine if the first operand is /// greater than the second operand and returns the result of the /// comparison. /// /// If either of the two lower 32-bit values is NaN, 0 is returned. /// /// \headerfile /// /// This intrinsic corresponds to the VUCOMISS / UCOMISS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \param __b /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. If either of the two /// lower 32-bit values is NaN, 0 is returned. static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomigt_ss(__m128 __a, __m128 __b) { return __builtin_ia32_ucomigt((__v4sf)__a, (__v4sf)__b); } /// Performs an unordered comparison of two 32-bit float values using /// the low-order bits of both operands to determine if the first operand is /// greater than or equal to the second operand and returns the result of /// the comparison. /// /// If either of the two lower 32-bit values is NaN, 0 is returned. /// /// \headerfile /// /// This intrinsic corresponds to the VUCOMISS / UCOMISS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \param __b /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. If either of the two /// lower 32-bit values is NaN, 0 is returned. static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomige_ss(__m128 __a, __m128 __b) { return __builtin_ia32_ucomige((__v4sf)__a, (__v4sf)__b); } /// Performs an unordered comparison of two 32-bit float values using /// the low-order bits of both operands to determine inequality and returns /// the result of the comparison. /// /// If either of the two lower 32-bit values is NaN, 1 is returned. /// /// \headerfile /// /// This intrinsic corresponds to the VUCOMISS / UCOMISS instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \param __b /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the comparison. /// \returns An integer containing the comparison results. If either of the two /// lower 32-bit values is NaN, 1 is returned. static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomineq_ss(__m128 __a, __m128 __b) { return __builtin_ia32_ucomineq((__v4sf)__a, (__v4sf)__b); } /// Converts a float value contained in the lower 32 bits of a vector of /// [4 x float] into a 32-bit integer. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTSS2SI / CVTSS2SI /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the conversion. /// \returns A 32-bit integer containing the converted value. static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtss_si32(__m128 __a) { return __builtin_ia32_cvtss2si((__v4sf)__a); } /// Converts a float value contained in the lower 32 bits of a vector of /// [4 x float] into a 32-bit integer. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTSS2SI / CVTSS2SI /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the conversion. /// \returns A 32-bit integer containing the converted value. static __inline__ int __DEFAULT_FN_ATTRS _mm_cvt_ss2si(__m128 __a) { return _mm_cvtss_si32(__a); } #ifdef __x86_64__ /// Converts a float value contained in the lower 32 bits of a vector of /// [4 x float] into a 64-bit integer. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTSS2SI / CVTSS2SI /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the conversion. /// \returns A 64-bit integer containing the converted value. static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvtss_si64(__m128 __a) { return __builtin_ia32_cvtss2si64((__v4sf)__a); } #endif /// Converts two low-order float values in a 128-bit vector of /// [4 x float] into a 64-bit vector of [2 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the CVTPS2PI instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \returns A 64-bit integer vector containing the converted values. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvtps_pi32(__m128 __a) { return (__m64)__builtin_ia32_cvtps2pi((__v4sf)__a); } /// Converts two low-order float values in a 128-bit vector of /// [4 x float] into a 64-bit vector of [2 x i32]. /// /// \headerfile /// /// This intrinsic corresponds to the CVTPS2PI instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \returns A 64-bit integer vector containing the converted values. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvt_ps2pi(__m128 __a) { return _mm_cvtps_pi32(__a); } /// Converts a float value contained in the lower 32 bits of a vector of /// [4 x float] into a 32-bit integer, truncating the result when it is /// inexact. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTTSS2SI / CVTTSS2SI /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the conversion. /// \returns A 32-bit integer containing the converted value. static __inline__ int __DEFAULT_FN_ATTRS _mm_cvttss_si32(__m128 __a) { return __builtin_ia32_cvttss2si((__v4sf)__a); } /// Converts a float value contained in the lower 32 bits of a vector of /// [4 x float] into a 32-bit integer, truncating the result when it is /// inexact. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTTSS2SI / CVTTSS2SI /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the conversion. /// \returns A 32-bit integer containing the converted value. static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtt_ss2si(__m128 __a) { return _mm_cvttss_si32(__a); } #ifdef __x86_64__ /// Converts a float value contained in the lower 32 bits of a vector of /// [4 x float] into a 64-bit integer, truncating the result when it is /// inexact. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTTSS2SI / CVTTSS2SI /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the conversion. /// \returns A 64-bit integer containing the converted value. static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvttss_si64(__m128 __a) { return __builtin_ia32_cvttss2si64((__v4sf)__a); } #endif /// Converts two low-order float values in a 128-bit vector of /// [4 x float] into a 64-bit vector of [2 x i32], truncating the result /// when it is inexact. /// /// \headerfile /// /// This intrinsic corresponds to the CVTTPS2PI / VTTPS2PI /// instructions. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \returns A 64-bit integer vector containing the converted values. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvttps_pi32(__m128 __a) { return (__m64)__builtin_ia32_cvttps2pi((__v4sf)__a); } /// Converts two low-order float values in a 128-bit vector of [4 x /// float] into a 64-bit vector of [2 x i32], truncating the result when it /// is inexact. /// /// \headerfile /// /// This intrinsic corresponds to the CVTTPS2PI instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \returns A 64-bit integer vector containing the converted values. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvtt_ps2pi(__m128 __a) { return _mm_cvttps_pi32(__a); } /// Converts a 32-bit signed integer value into a floating point value /// and writes it to the lower 32 bits of the destination. The remaining /// higher order elements of the destination vector are copied from the /// corresponding elements in the first operand. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTSI2SS / CVTSI2SS instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 32-bit signed integer operand containing the value to be converted. /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the /// converted value of the second operand. The upper 96 bits are copied from /// the upper 96 bits of the first operand. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsi32_ss(__m128 __a, int __b) { __a[0] = __b; return __a; } /// Converts a 32-bit signed integer value into a floating point value /// and writes it to the lower 32 bits of the destination. The remaining /// higher order elements of the destination are copied from the /// corresponding elements in the first operand. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTSI2SS / CVTSI2SS instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 32-bit signed integer operand containing the value to be converted. /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the /// converted value of the second operand. The upper 96 bits are copied from /// the upper 96 bits of the first operand. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvt_si2ss(__m128 __a, int __b) { return _mm_cvtsi32_ss(__a, __b); } #ifdef __x86_64__ /// Converts a 64-bit signed integer value into a floating point value /// and writes it to the lower 32 bits of the destination. The remaining /// higher order elements of the destination are copied from the /// corresponding elements in the first operand. /// /// \headerfile /// /// This intrinsic corresponds to the VCVTSI2SS / CVTSI2SS instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 64-bit signed integer operand containing the value to be converted. /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the /// converted value of the second operand. The upper 96 bits are copied from /// the upper 96 bits of the first operand. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsi64_ss(__m128 __a, long long __b) { __a[0] = __b; return __a; } #endif /// Converts two elements of a 64-bit vector of [2 x i32] into two /// floating point values and writes them to the lower 64-bits of the /// destination. The remaining higher order elements of the destination are /// copied from the corresponding elements in the first operand. /// /// \headerfile /// /// This intrinsic corresponds to the CVTPI2PS instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 64-bit vector of [2 x i32]. The elements in this vector are converted /// and written to the corresponding low-order elements in the destination. /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the /// converted value of the second operand. The upper 64 bits are copied from /// the upper 64 bits of the first operand. static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX _mm_cvtpi32_ps(__m128 __a, __m64 __b) { return __builtin_ia32_cvtpi2ps((__v4sf)__a, (__v2si)__b); } /// Converts two elements of a 64-bit vector of [2 x i32] into two /// floating point values and writes them to the lower 64-bits of the /// destination. The remaining higher order elements of the destination are /// copied from the corresponding elements in the first operand. /// /// \headerfile /// /// This intrinsic corresponds to the CVTPI2PS instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. /// \param __b /// A 64-bit vector of [2 x i32]. The elements in this vector are converted /// and written to the corresponding low-order elements in the destination. /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the /// converted value from the second operand. The upper 64 bits are copied /// from the upper 64 bits of the first operand. static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX _mm_cvt_pi2ps(__m128 __a, __m64 __b) { return _mm_cvtpi32_ps(__a, __b); } /// Extracts a float value contained in the lower 32 bits of a vector of /// [4 x float]. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the extraction. /// \returns A 32-bit float containing the extracted value. static __inline__ float __DEFAULT_FN_ATTRS _mm_cvtss_f32(__m128 __a) { return __a[0]; } /// Loads two packed float values from the address \a __p into the /// high-order bits of a 128-bit vector of [4 x float]. The low-order bits /// are copied from the low-order bits of the first operand. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVHPD / MOVHPD instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. Bits [63:0] are written to bits [63:0] /// of the destination. /// \param __p /// A pointer to two packed float values. Bits [63:0] are written to bits /// [127:64] of the destination. /// \returns A 128-bit vector of [4 x float] containing the moved values. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_loadh_pi(__m128 __a, const __m64 *__p) { typedef float __mm_loadh_pi_v2f32 __attribute__((__vector_size__(8))); struct __mm_loadh_pi_struct { __mm_loadh_pi_v2f32 __u; } __attribute__((__packed__, __may_alias__)); __mm_loadh_pi_v2f32 __b = ((const struct __mm_loadh_pi_struct*)__p)->__u; __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1); return __builtin_shufflevector(__a, __bb, 0, 1, 4, 5); } /// Loads two packed float values from the address \a __p into the /// low-order bits of a 128-bit vector of [4 x float]. The high-order bits /// are copied from the high-order bits of the first operand. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVLPD / MOVLPD instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. Bits [127:64] are written to bits /// [127:64] of the destination. /// \param __p /// A pointer to two packed float values. Bits [63:0] are written to bits /// [63:0] of the destination. /// \returns A 128-bit vector of [4 x float] containing the moved values. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_loadl_pi(__m128 __a, const __m64 *__p) { typedef float __mm_loadl_pi_v2f32 __attribute__((__vector_size__(8))); struct __mm_loadl_pi_struct { __mm_loadl_pi_v2f32 __u; } __attribute__((__packed__, __may_alias__)); __mm_loadl_pi_v2f32 __b = ((const struct __mm_loadl_pi_struct*)__p)->__u; __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1); return __builtin_shufflevector(__a, __bb, 4, 5, 2, 3); } /// Constructs a 128-bit floating-point vector of [4 x float]. The lower /// 32 bits of the vector are initialized with the single-precision /// floating-point value loaded from a specified memory location. The upper /// 96 bits are set to zero. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVSS / MOVSS instruction. /// /// \param __p /// A pointer to a 32-bit memory location containing a single-precision /// floating-point value. /// \returns An initialized 128-bit floating-point vector of [4 x float]. The /// lower 32 bits contain the value loaded from the memory location. The /// upper 96 bits are set to zero. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_load_ss(const float *__p) { struct __mm_load_ss_struct { float __u; } __attribute__((__packed__, __may_alias__)); float __u = ((const struct __mm_load_ss_struct*)__p)->__u; return __extension__ (__m128){ __u, 0, 0, 0 }; } /// Loads a 32-bit float value and duplicates it to all four vector /// elements of a 128-bit vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VBROADCASTSS / MOVSS + shuffling /// instruction. /// /// \param __p /// A pointer to a float value to be loaded and duplicated. /// \returns A 128-bit vector of [4 x float] containing the loaded and /// duplicated values. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_load1_ps(const float *__p) { struct __mm_load1_ps_struct { float __u; } __attribute__((__packed__, __may_alias__)); float __u = ((const struct __mm_load1_ps_struct*)__p)->__u; return __extension__ (__m128){ __u, __u, __u, __u }; } #define _mm_load_ps1(p) _mm_load1_ps(p) /// Loads a 128-bit floating-point vector of [4 x float] from an aligned /// memory location. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVAPS / MOVAPS instruction. /// /// \param __p /// A pointer to a 128-bit memory location. The address of the memory /// location has to be 128-bit aligned. /// \returns A 128-bit vector of [4 x float] containing the loaded values. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_load_ps(const float *__p) { return *(const __m128*)__p; } /// Loads a 128-bit floating-point vector of [4 x float] from an /// unaligned memory location. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVUPS / MOVUPS instruction. /// /// \param __p /// A pointer to a 128-bit memory location. The address of the memory /// location does not have to be aligned. /// \returns A 128-bit vector of [4 x float] containing the loaded values. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_loadu_ps(const float *__p) { struct __loadu_ps { __m128_u __v; } __attribute__((__packed__, __may_alias__)); return ((const struct __loadu_ps*)__p)->__v; } /// Loads four packed float values, in reverse order, from an aligned /// memory location to 32-bit elements in a 128-bit vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVAPS / MOVAPS + shuffling /// instruction. /// /// \param __p /// A pointer to a 128-bit memory location. The address of the memory /// location has to be 128-bit aligned. /// \returns A 128-bit vector of [4 x float] containing the moved values, loaded /// in reverse order. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_loadr_ps(const float *__p) { __m128 __a = _mm_load_ps(__p); return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 3, 2, 1, 0); } /// Create a 128-bit vector of [4 x float] with undefined values. /// /// \headerfile /// /// This intrinsic has no corresponding instruction. /// /// \returns A 128-bit vector of [4 x float] containing undefined values. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_undefined_ps(void) { return (__m128)__builtin_ia32_undef128(); } /// Constructs a 128-bit floating-point vector of [4 x float]. The lower /// 32 bits of the vector are initialized with the specified single-precision /// floating-point value. The upper 96 bits are set to zero. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVSS / MOVSS instruction. /// /// \param __w /// A single-precision floating-point value used to initialize the lower 32 /// bits of the result. /// \returns An initialized 128-bit floating-point vector of [4 x float]. The /// lower 32 bits contain the value provided in the source operand. The /// upper 96 bits are set to zero. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_set_ss(float __w) { return __extension__ (__m128){ __w, 0, 0, 0 }; } /// Constructs a 128-bit floating-point vector of [4 x float], with each /// of the four single-precision floating-point vector elements set to the /// specified single-precision floating-point value. /// /// \headerfile /// /// This intrinsic corresponds to the VPERMILPS / PERMILPS instruction. /// /// \param __w /// A single-precision floating-point value used to initialize each vector /// element of the result. /// \returns An initialized 128-bit floating-point vector of [4 x float]. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_set1_ps(float __w) { return __extension__ (__m128){ __w, __w, __w, __w }; } /* Microsoft specific. */ /// Constructs a 128-bit floating-point vector of [4 x float], with each /// of the four single-precision floating-point vector elements set to the /// specified single-precision floating-point value. /// /// \headerfile /// /// This intrinsic corresponds to the VPERMILPS / PERMILPS instruction. /// /// \param __w /// A single-precision floating-point value used to initialize each vector /// element of the result. /// \returns An initialized 128-bit floating-point vector of [4 x float]. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_set_ps1(float __w) { return _mm_set1_ps(__w); } /// Constructs a 128-bit floating-point vector of [4 x float] /// initialized with the specified single-precision floating-point values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __z /// A single-precision floating-point value used to initialize bits [127:96] /// of the result. /// \param __y /// A single-precision floating-point value used to initialize bits [95:64] /// of the result. /// \param __x /// A single-precision floating-point value used to initialize bits [63:32] /// of the result. /// \param __w /// A single-precision floating-point value used to initialize bits [31:0] /// of the result. /// \returns An initialized 128-bit floating-point vector of [4 x float]. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_set_ps(float __z, float __y, float __x, float __w) { return __extension__ (__m128){ __w, __x, __y, __z }; } /// Constructs a 128-bit floating-point vector of [4 x float], /// initialized in reverse order with the specified 32-bit single-precision /// float-point values. /// /// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. /// /// \param __z /// A single-precision floating-point value used to initialize bits [31:0] /// of the result. /// \param __y /// A single-precision floating-point value used to initialize bits [63:32] /// of the result. /// \param __x /// A single-precision floating-point value used to initialize bits [95:64] /// of the result. /// \param __w /// A single-precision floating-point value used to initialize bits [127:96] /// of the result. /// \returns An initialized 128-bit floating-point vector of [4 x float]. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_setr_ps(float __z, float __y, float __x, float __w) { return __extension__ (__m128){ __z, __y, __x, __w }; } /// Constructs a 128-bit floating-point vector of [4 x float] initialized /// to zero. /// /// \headerfile /// /// This intrinsic corresponds to the VXORPS / XORPS instruction. /// /// \returns An initialized 128-bit floating-point vector of [4 x float] with /// all elements set to zero. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_setzero_ps(void) { return __extension__ (__m128){ 0.0f, 0.0f, 0.0f, 0.0f }; } /// Stores the upper 64 bits of a 128-bit vector of [4 x float] to a /// memory location. /// /// \headerfile /// /// This intrinsic corresponds to the VPEXTRQ / PEXTRQ instruction. /// /// \param __p /// A pointer to a 64-bit memory location. /// \param __a /// A 128-bit vector of [4 x float] containing the values to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_storeh_pi(__m64 *__p, __m128 __a) { typedef float __mm_storeh_pi_v2f32 __attribute__((__vector_size__(8))); struct __mm_storeh_pi_struct { __mm_storeh_pi_v2f32 __u; } __attribute__((__packed__, __may_alias__)); ((struct __mm_storeh_pi_struct*)__p)->__u = __builtin_shufflevector(__a, __a, 2, 3); } /// Stores the lower 64 bits of a 128-bit vector of [4 x float] to a /// memory location. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVLPS / MOVLPS instruction. /// /// \param __p /// A pointer to a memory location that will receive the float values. /// \param __a /// A 128-bit vector of [4 x float] containing the values to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pi(__m64 *__p, __m128 __a) { typedef float __mm_storeh_pi_v2f32 __attribute__((__vector_size__(8))); struct __mm_storeh_pi_struct { __mm_storeh_pi_v2f32 __u; } __attribute__((__packed__, __may_alias__)); ((struct __mm_storeh_pi_struct*)__p)->__u = __builtin_shufflevector(__a, __a, 0, 1); } /// Stores the lower 32 bits of a 128-bit vector of [4 x float] to a /// memory location. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVSS / MOVSS instruction. /// /// \param __p /// A pointer to a 32-bit memory location. /// \param __a /// A 128-bit vector of [4 x float] containing the value to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_store_ss(float *__p, __m128 __a) { struct __mm_store_ss_struct { float __u; } __attribute__((__packed__, __may_alias__)); ((struct __mm_store_ss_struct*)__p)->__u = __a[0]; } /// Stores a 128-bit vector of [4 x float] to an unaligned memory /// location. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVUPS / MOVUPS instruction. /// /// \param __p /// A pointer to a 128-bit memory location. The address of the memory /// location does not have to be aligned. /// \param __a /// A 128-bit vector of [4 x float] containing the values to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_ps(float *__p, __m128 __a) { struct __storeu_ps { __m128_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_ps*)__p)->__v = __a; } /// Stores a 128-bit vector of [4 x float] into an aligned memory /// location. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVAPS / MOVAPS instruction. /// /// \param __p /// A pointer to a 128-bit memory location. The address of the memory /// location has to be 16-byte aligned. /// \param __a /// A 128-bit vector of [4 x float] containing the values to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_store_ps(float *__p, __m128 __a) { *(__m128*)__p = __a; } /// Stores the lower 32 bits of a 128-bit vector of [4 x float] into /// four contiguous elements in an aligned memory location. /// /// \headerfile /// /// This intrinsic corresponds to VMOVAPS / MOVAPS + shuffling /// instruction. /// /// \param __p /// A pointer to a 128-bit memory location. /// \param __a /// A 128-bit vector of [4 x float] whose lower 32 bits are stored to each /// of the four contiguous elements pointed by \a __p. static __inline__ void __DEFAULT_FN_ATTRS _mm_store1_ps(float *__p, __m128 __a) { __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0); _mm_store_ps(__p, __a); } /// Stores the lower 32 bits of a 128-bit vector of [4 x float] into /// four contiguous elements in an aligned memory location. /// /// \headerfile /// /// This intrinsic corresponds to VMOVAPS / MOVAPS + shuffling /// instruction. /// /// \param __p /// A pointer to a 128-bit memory location. /// \param __a /// A 128-bit vector of [4 x float] whose lower 32 bits are stored to each /// of the four contiguous elements pointed by \a __p. static __inline__ void __DEFAULT_FN_ATTRS _mm_store_ps1(float *__p, __m128 __a) { _mm_store1_ps(__p, __a); } /// Stores float values from a 128-bit vector of [4 x float] to an /// aligned memory location in reverse order. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVAPS / MOVAPS + shuffling /// instruction. /// /// \param __p /// A pointer to a 128-bit memory location. The address of the memory /// location has to be 128-bit aligned. /// \param __a /// A 128-bit vector of [4 x float] containing the values to be stored. static __inline__ void __DEFAULT_FN_ATTRS _mm_storer_ps(float *__p, __m128 __a) { __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 3, 2, 1, 0); _mm_store_ps(__p, __a); } #define _MM_HINT_ET0 7 #define _MM_HINT_ET1 6 #define _MM_HINT_T0 3 #define _MM_HINT_T1 2 #define _MM_HINT_T2 1 #define _MM_HINT_NTA 0 #ifndef _MSC_VER /* FIXME: We have to #define this because "sel" must be a constant integer, and Sema doesn't do any form of constant propagation yet. */ /// Loads one cache line of data from the specified address to a location /// closer to the processor. /// /// \headerfile /// /// \code /// void _mm_prefetch(const void *a, const int sel); /// \endcode /// /// This intrinsic corresponds to the PREFETCHNTA instruction. /// /// \param a /// A pointer to a memory location containing a cache line of data. /// \param sel /// A predefined integer constant specifying the type of prefetch /// operation: \n /// _MM_HINT_NTA: Move data using the non-temporal access (NTA) hint. The /// PREFETCHNTA instruction will be generated. \n /// _MM_HINT_T0: Move data using the T0 hint. The PREFETCHT0 instruction will /// be generated. \n /// _MM_HINT_T1: Move data using the T1 hint. The PREFETCHT1 instruction will /// be generated. \n /// _MM_HINT_T2: Move data using the T2 hint. The PREFETCHT2 instruction will /// be generated. #define _mm_prefetch(a, sel) (__builtin_prefetch((const void *)(a), \ ((sel) >> 2) & 1, (sel) & 0x3)) #endif /// Stores a 64-bit integer in the specified aligned memory location. To /// minimize caching, the data is flagged as non-temporal (unlikely to be /// used again soon). /// /// \headerfile /// /// This intrinsic corresponds to the MOVNTQ instruction. /// /// \param __p /// A pointer to an aligned memory location used to store the register value. /// \param __a /// A 64-bit integer containing the value to be stored. static __inline__ void __DEFAULT_FN_ATTRS_MMX _mm_stream_pi(__m64 *__p, __m64 __a) { __builtin_ia32_movntq(__p, __a); } /// Moves packed float values from a 128-bit vector of [4 x float] to a /// 128-bit aligned memory location. To minimize caching, the data is flagged /// as non-temporal (unlikely to be used again soon). /// /// \headerfile /// /// This intrinsic corresponds to the VMOVNTPS / MOVNTPS instruction. /// /// \param __p /// A pointer to a 128-bit aligned memory location that will receive the /// single-precision floating-point values. /// \param __a /// A 128-bit vector of [4 x float] containing the values to be moved. static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_ps(float *__p, __m128 __a) { __builtin_nontemporal_store((__v4sf)__a, (__v4sf*)__p); } #if defined(__cplusplus) extern "C" { #endif /// Forces strong memory ordering (serialization) between store /// instructions preceding this instruction and store instructions following /// this instruction, ensuring the system completes all previous stores /// before executing subsequent stores. /// /// \headerfile /// /// This intrinsic corresponds to the SFENCE instruction. /// void _mm_sfence(void); #if defined(__cplusplus) } // extern "C" #endif /// Extracts 16-bit element from a 64-bit vector of [4 x i16] and /// returns it, as specified by the immediate integer operand. /// /// \headerfile /// /// \code /// int _mm_extract_pi16(__m64 a, int n); /// \endcode /// /// This intrinsic corresponds to the VPEXTRW / PEXTRW instruction. /// /// \param a /// A 64-bit vector of [4 x i16]. /// \param n /// An immediate integer operand that determines which bits are extracted: \n /// 0: Bits [15:0] are copied to the destination. \n /// 1: Bits [31:16] are copied to the destination. \n /// 2: Bits [47:32] are copied to the destination. \n /// 3: Bits [63:48] are copied to the destination. /// \returns A 16-bit integer containing the extracted 16 bits of packed data. #define _mm_extract_pi16(a, n) \ ((int)__builtin_ia32_vec_ext_v4hi((__v4hi)a, (int)n)) /// Copies data from the 64-bit vector of [4 x i16] to the destination, /// and inserts the lower 16-bits of an integer operand at the 16-bit offset /// specified by the immediate operand \a n. /// /// \headerfile /// /// \code /// __m64 _mm_insert_pi16(__m64 a, int d, int n); /// \endcode /// /// This intrinsic corresponds to the PINSRW instruction. /// /// \param a /// A 64-bit vector of [4 x i16]. /// \param d /// An integer. The lower 16-bit value from this operand is written to the /// destination at the offset specified by operand \a n. /// \param n /// An immediate integer operant that determines which the bits to be used /// in the destination. \n /// 0: Bits [15:0] are copied to the destination. \n /// 1: Bits [31:16] are copied to the destination. \n /// 2: Bits [47:32] are copied to the destination. \n /// 3: Bits [63:48] are copied to the destination. \n /// The remaining bits in the destination are copied from the corresponding /// bits in operand \a a. /// \returns A 64-bit integer vector containing the copied packed data from the /// operands. #define _mm_insert_pi16(a, d, n) \ ((__m64)__builtin_ia32_vec_set_v4hi((__v4hi)a, (int)d, (int)n)) /// Compares each of the corresponding packed 16-bit integer values of /// the 64-bit integer vectors, and writes the greater value to the /// corresponding bits in the destination. /// /// \headerfile /// /// This intrinsic corresponds to the PMAXSW instruction. /// /// \param __a /// A 64-bit integer vector containing one of the source operands. /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the comparison results. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_max_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pmaxsw((__v4hi)__a, (__v4hi)__b); } /// Compares each of the corresponding packed 8-bit unsigned integer /// values of the 64-bit integer vectors, and writes the greater value to the /// corresponding bits in the destination. /// /// \headerfile /// /// This intrinsic corresponds to the PMAXUB instruction. /// /// \param __a /// A 64-bit integer vector containing one of the source operands. /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the comparison results. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_max_pu8(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pmaxub((__v8qi)__a, (__v8qi)__b); } /// Compares each of the corresponding packed 16-bit integer values of /// the 64-bit integer vectors, and writes the lesser value to the /// corresponding bits in the destination. /// /// \headerfile /// /// This intrinsic corresponds to the PMINSW instruction. /// /// \param __a /// A 64-bit integer vector containing one of the source operands. /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the comparison results. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_min_pi16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pminsw((__v4hi)__a, (__v4hi)__b); } /// Compares each of the corresponding packed 8-bit unsigned integer /// values of the 64-bit integer vectors, and writes the lesser value to the /// corresponding bits in the destination. /// /// \headerfile /// /// This intrinsic corresponds to the PMINUB instruction. /// /// \param __a /// A 64-bit integer vector containing one of the source operands. /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the comparison results. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_min_pu8(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pminub((__v8qi)__a, (__v8qi)__b); } /// Takes the most significant bit from each 8-bit element in a 64-bit /// integer vector to create an 8-bit mask value. Zero-extends the value to /// 32-bit integer and writes it to the destination. /// /// \headerfile /// /// This intrinsic corresponds to the PMOVMSKB instruction. /// /// \param __a /// A 64-bit integer vector containing the values with bits to be extracted. /// \returns The most significant bit from each 8-bit element in \a __a, /// written to bits [7:0]. static __inline__ int __DEFAULT_FN_ATTRS_MMX _mm_movemask_pi8(__m64 __a) { return __builtin_ia32_pmovmskb((__v8qi)__a); } /// Multiplies packed 16-bit unsigned integer values and writes the /// high-order 16 bits of each 32-bit product to the corresponding bits in /// the destination. /// /// \headerfile /// /// This intrinsic corresponds to the PMULHUW instruction. /// /// \param __a /// A 64-bit integer vector containing one of the source operands. /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the products of both operands. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_mulhi_pu16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pmulhuw((__v4hi)__a, (__v4hi)__b); } /// Shuffles the 4 16-bit integers from a 64-bit integer vector to the /// destination, as specified by the immediate value operand. /// /// \headerfile /// /// \code /// __m64 _mm_shuffle_pi16(__m64 a, const int n); /// \endcode /// /// This intrinsic corresponds to the PSHUFW instruction. /// /// \param a /// A 64-bit integer vector containing the values to be shuffled. /// \param n /// An immediate value containing an 8-bit value specifying which elements to /// copy from \a a. The destinations within the 64-bit destination are /// assigned values as follows: \n /// Bits [1:0] are used to assign values to bits [15:0] in the /// destination. \n /// Bits [3:2] are used to assign values to bits [31:16] in the /// destination. \n /// Bits [5:4] are used to assign values to bits [47:32] in the /// destination. \n /// Bits [7:6] are used to assign values to bits [63:48] in the /// destination. \n /// Bit value assignments: \n /// 00: assigned from bits [15:0] of \a a. \n /// 01: assigned from bits [31:16] of \a a. \n /// 10: assigned from bits [47:32] of \a a. \n /// 11: assigned from bits [63:48] of \a a. \n /// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. /// _MM_SHUFFLE(b6, b4, b2, b0) can create an 8-bit mask of the form /// [b6, b4, b2, b0]. /// \returns A 64-bit integer vector containing the shuffled values. #define _mm_shuffle_pi16(a, n) \ ((__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n))) /// Conditionally copies the values from each 8-bit element in the first /// 64-bit integer vector operand to the specified memory location, as /// specified by the most significant bit in the corresponding element in the /// second 64-bit integer vector operand. /// /// To minimize caching, the data is flagged as non-temporal /// (unlikely to be used again soon). /// /// \headerfile /// /// This intrinsic corresponds to the MASKMOVQ instruction. /// /// \param __d /// A 64-bit integer vector containing the values with elements to be copied. /// \param __n /// A 64-bit integer vector operand. The most significant bit from each 8-bit /// element determines whether the corresponding element in operand \a __d /// is copied. If the most significant bit of a given element is 1, the /// corresponding element in operand \a __d is copied. /// \param __p /// A pointer to a 64-bit memory location that will receive the conditionally /// copied integer values. The address of the memory location does not have /// to be aligned. static __inline__ void __DEFAULT_FN_ATTRS_MMX _mm_maskmove_si64(__m64 __d, __m64 __n, char *__p) { __builtin_ia32_maskmovq((__v8qi)__d, (__v8qi)__n, __p); } /// Computes the rounded averages of the packed unsigned 8-bit integer /// values and writes the averages to the corresponding bits in the /// destination. /// /// \headerfile /// /// This intrinsic corresponds to the PAVGB instruction. /// /// \param __a /// A 64-bit integer vector containing one of the source operands. /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the averages of both operands. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_avg_pu8(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pavgb((__v8qi)__a, (__v8qi)__b); } /// Computes the rounded averages of the packed unsigned 16-bit integer /// values and writes the averages to the corresponding bits in the /// destination. /// /// \headerfile /// /// This intrinsic corresponds to the PAVGW instruction. /// /// \param __a /// A 64-bit integer vector containing one of the source operands. /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the averages of both operands. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_avg_pu16(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_pavgw((__v4hi)__a, (__v4hi)__b); } /// Subtracts the corresponding 8-bit unsigned integer values of the two /// 64-bit vector operands and computes the absolute value for each of the /// difference. Then sum of the 8 absolute differences is written to the /// bits [15:0] of the destination; the remaining bits [63:16] are cleared. /// /// \headerfile /// /// This intrinsic corresponds to the PSADBW instruction. /// /// \param __a /// A 64-bit integer vector containing one of the source operands. /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector whose lower 16 bits contain the sums of the /// sets of absolute differences between both operands. The upper bits are /// cleared. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sad_pu8(__m64 __a, __m64 __b) { return (__m64)__builtin_ia32_psadbw((__v8qi)__a, (__v8qi)__b); } #if defined(__cplusplus) extern "C" { #endif /// Returns the contents of the MXCSR register as a 32-bit unsigned /// integer value. /// /// There are several groups of macros associated with this /// intrinsic, including: ///
    ///
  • /// For checking exception states: _MM_EXCEPT_INVALID, _MM_EXCEPT_DIV_ZERO, /// _MM_EXCEPT_DENORM, _MM_EXCEPT_OVERFLOW, _MM_EXCEPT_UNDERFLOW, /// _MM_EXCEPT_INEXACT. There is a convenience wrapper /// _MM_GET_EXCEPTION_STATE(). ///
  • ///
  • /// For checking exception masks: _MM_MASK_UNDERFLOW, _MM_MASK_OVERFLOW, /// _MM_MASK_INVALID, _MM_MASK_DENORM, _MM_MASK_DIV_ZERO, _MM_MASK_INEXACT. /// There is a convenience wrapper _MM_GET_EXCEPTION_MASK(). ///
  • ///
  • /// For checking rounding modes: _MM_ROUND_NEAREST, _MM_ROUND_DOWN, /// _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO. There is a convenience wrapper /// _MM_GET_ROUNDING_MODE(). ///
  • ///
  • /// For checking flush-to-zero mode: _MM_FLUSH_ZERO_ON, _MM_FLUSH_ZERO_OFF. /// There is a convenience wrapper _MM_GET_FLUSH_ZERO_MODE(). ///
  • ///
  • /// For checking denormals-are-zero mode: _MM_DENORMALS_ZERO_ON, /// _MM_DENORMALS_ZERO_OFF. There is a convenience wrapper /// _MM_GET_DENORMALS_ZERO_MODE(). ///
  • ///
/// /// For example, the following expression checks if an overflow exception has /// occurred: /// \code /// ( _mm_getcsr() & _MM_EXCEPT_OVERFLOW ) /// \endcode /// /// The following expression gets the current rounding mode: /// \code /// _MM_GET_ROUNDING_MODE() /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the VSTMXCSR / STMXCSR instruction. /// /// \returns A 32-bit unsigned integer containing the contents of the MXCSR /// register. unsigned int _mm_getcsr(void); /// Sets the MXCSR register with the 32-bit unsigned integer value. /// /// There are several groups of macros associated with this intrinsic, /// including: ///
    ///
  • /// For setting exception states: _MM_EXCEPT_INVALID, _MM_EXCEPT_DIV_ZERO, /// _MM_EXCEPT_DENORM, _MM_EXCEPT_OVERFLOW, _MM_EXCEPT_UNDERFLOW, /// _MM_EXCEPT_INEXACT. There is a convenience wrapper /// _MM_SET_EXCEPTION_STATE(x) where x is one of these macros. ///
  • ///
  • /// For setting exception masks: _MM_MASK_UNDERFLOW, _MM_MASK_OVERFLOW, /// _MM_MASK_INVALID, _MM_MASK_DENORM, _MM_MASK_DIV_ZERO, _MM_MASK_INEXACT. /// There is a convenience wrapper _MM_SET_EXCEPTION_MASK(x) where x is one /// of these macros. ///
  • ///
  • /// For setting rounding modes: _MM_ROUND_NEAREST, _MM_ROUND_DOWN, /// _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO. There is a convenience wrapper /// _MM_SET_ROUNDING_MODE(x) where x is one of these macros. ///
  • ///
  • /// For setting flush-to-zero mode: _MM_FLUSH_ZERO_ON, _MM_FLUSH_ZERO_OFF. /// There is a convenience wrapper _MM_SET_FLUSH_ZERO_MODE(x) where x is /// one of these macros. ///
  • ///
  • /// For setting denormals-are-zero mode: _MM_DENORMALS_ZERO_ON, /// _MM_DENORMALS_ZERO_OFF. There is a convenience wrapper /// _MM_SET_DENORMALS_ZERO_MODE(x) where x is one of these macros. ///
  • ///
/// /// For example, the following expression causes subsequent floating-point /// operations to round up: /// _mm_setcsr(_mm_getcsr() | _MM_ROUND_UP) /// /// The following example sets the DAZ and FTZ flags: /// \code /// void setFlags() { /// _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); /// _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); /// } /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the VLDMXCSR / LDMXCSR instruction. /// /// \param __i /// A 32-bit unsigned integer value to be written to the MXCSR register. void _mm_setcsr(unsigned int __i); #if defined(__cplusplus) } // extern "C" #endif /// Selects 4 float values from the 128-bit operands of [4 x float], as /// specified by the immediate value operand. /// /// \headerfile /// /// \code /// __m128 _mm_shuffle_ps(__m128 a, __m128 b, const int mask); /// \endcode /// /// This intrinsic corresponds to the VSHUFPS / SHUFPS instruction. /// /// \param a /// A 128-bit vector of [4 x float]. /// \param b /// A 128-bit vector of [4 x float]. /// \param mask /// An immediate value containing an 8-bit value specifying which elements to /// copy from \a a and \a b. \n /// Bits [3:0] specify the values copied from operand \a a. \n /// Bits [7:4] specify the values copied from operand \a b. \n /// The destinations within the 128-bit destination are assigned values as /// follows: \n /// Bits [1:0] are used to assign values to bits [31:0] in the /// destination. \n /// Bits [3:2] are used to assign values to bits [63:32] in the /// destination. \n /// Bits [5:4] are used to assign values to bits [95:64] in the /// destination. \n /// Bits [7:6] are used to assign values to bits [127:96] in the /// destination. \n /// Bit value assignments: \n /// 00: Bits [31:0] copied from the specified operand. \n /// 01: Bits [63:32] copied from the specified operand. \n /// 10: Bits [95:64] copied from the specified operand. \n /// 11: Bits [127:96] copied from the specified operand. \n /// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. /// _MM_SHUFFLE(b6, b4, b2, b0) can create an 8-bit mask of the form /// [b6, b4, b2, b0]. /// \returns A 128-bit vector of [4 x float] containing the shuffled values. #define _mm_shuffle_ps(a, b, mask) \ ((__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \ (int)(mask))) /// Unpacks the high-order (index 2,3) values from two 128-bit vectors of /// [4 x float] and interleaves them into a 128-bit vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VUNPCKHPS / UNPCKHPS instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. \n /// Bits [95:64] are written to bits [31:0] of the destination. \n /// Bits [127:96] are written to bits [95:64] of the destination. /// \param __b /// A 128-bit vector of [4 x float]. /// Bits [95:64] are written to bits [63:32] of the destination. \n /// Bits [127:96] are written to bits [127:96] of the destination. /// \returns A 128-bit vector of [4 x float] containing the interleaved values. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_unpackhi_ps(__m128 __a, __m128 __b) { return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 2, 6, 3, 7); } /// Unpacks the low-order (index 0,1) values from two 128-bit vectors of /// [4 x float] and interleaves them into a 128-bit vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the VUNPCKLPS / UNPCKLPS instruction. /// /// \param __a /// A 128-bit vector of [4 x float]. \n /// Bits [31:0] are written to bits [31:0] of the destination. \n /// Bits [63:32] are written to bits [95:64] of the destination. /// \param __b /// A 128-bit vector of [4 x float]. \n /// Bits [31:0] are written to bits [63:32] of the destination. \n /// Bits [63:32] are written to bits [127:96] of the destination. /// \returns A 128-bit vector of [4 x float] containing the interleaved values. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_unpacklo_ps(__m128 __a, __m128 __b) { return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 0, 4, 1, 5); } /// Constructs a 128-bit floating-point vector of [4 x float]. The lower /// 32 bits are set to the lower 32 bits of the second parameter. The upper /// 96 bits are set to the upper 96 bits of the first parameter. /// /// \headerfile /// /// This intrinsic corresponds to the VBLENDPS / BLENDPS / MOVSS /// instruction. /// /// \param __a /// A 128-bit floating-point vector of [4 x float]. The upper 96 bits are /// written to the upper 96 bits of the result. /// \param __b /// A 128-bit floating-point vector of [4 x float]. The lower 32 bits are /// written to the lower 32 bits of the result. /// \returns A 128-bit floating-point vector of [4 x float]. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_move_ss(__m128 __a, __m128 __b) { __a[0] = __b[0]; return __a; } /// Constructs a 128-bit floating-point vector of [4 x float]. The lower /// 64 bits are set to the upper 64 bits of the second parameter. The upper /// 64 bits are set to the upper 64 bits of the first parameter. /// /// \headerfile /// /// This intrinsic corresponds to the VUNPCKHPD / UNPCKHPD instruction. /// /// \param __a /// A 128-bit floating-point vector of [4 x float]. The upper 64 bits are /// written to the upper 64 bits of the result. /// \param __b /// A 128-bit floating-point vector of [4 x float]. The upper 64 bits are /// written to the lower 64 bits of the result. /// \returns A 128-bit floating-point vector of [4 x float]. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_movehl_ps(__m128 __a, __m128 __b) { return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 6, 7, 2, 3); } /// Constructs a 128-bit floating-point vector of [4 x float]. The lower /// 64 bits are set to the lower 64 bits of the first parameter. The upper /// 64 bits are set to the lower 64 bits of the second parameter. /// /// \headerfile /// /// This intrinsic corresponds to the VUNPCKLPD / UNPCKLPD instruction. /// /// \param __a /// A 128-bit floating-point vector of [4 x float]. The lower 64 bits are /// written to the lower 64 bits of the result. /// \param __b /// A 128-bit floating-point vector of [4 x float]. The lower 64 bits are /// written to the upper 64 bits of the result. /// \returns A 128-bit floating-point vector of [4 x float]. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_movelh_ps(__m128 __a, __m128 __b) { return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 0, 1, 4, 5); } /// Converts a 64-bit vector of [4 x i16] into a 128-bit vector of [4 x /// float]. /// /// \headerfile /// /// This intrinsic corresponds to the CVTPI2PS + COMPOSITE instruction. /// /// \param __a /// A 64-bit vector of [4 x i16]. The elements of the destination are copied /// from the corresponding elements in this operand. /// \returns A 128-bit vector of [4 x float] containing the copied and converted /// values from the operand. static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX _mm_cvtpi16_ps(__m64 __a) { __m64 __b, __c; __m128 __r; __b = _mm_setzero_si64(); __b = _mm_cmpgt_pi16(__b, __a); __c = _mm_unpackhi_pi16(__a, __b); __r = _mm_setzero_ps(); __r = _mm_cvtpi32_ps(__r, __c); __r = _mm_movelh_ps(__r, __r); __c = _mm_unpacklo_pi16(__a, __b); __r = _mm_cvtpi32_ps(__r, __c); return __r; } /// Converts a 64-bit vector of 16-bit unsigned integer values into a /// 128-bit vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the CVTPI2PS + COMPOSITE instruction. /// /// \param __a /// A 64-bit vector of 16-bit unsigned integer values. The elements of the /// destination are copied from the corresponding elements in this operand. /// \returns A 128-bit vector of [4 x float] containing the copied and converted /// values from the operand. static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX _mm_cvtpu16_ps(__m64 __a) { __m64 __b, __c; __m128 __r; __b = _mm_setzero_si64(); __c = _mm_unpackhi_pi16(__a, __b); __r = _mm_setzero_ps(); __r = _mm_cvtpi32_ps(__r, __c); __r = _mm_movelh_ps(__r, __r); __c = _mm_unpacklo_pi16(__a, __b); __r = _mm_cvtpi32_ps(__r, __c); return __r; } /// Converts the lower four 8-bit values from a 64-bit vector of [8 x i8] /// into a 128-bit vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the CVTPI2PS + COMPOSITE instruction. /// /// \param __a /// A 64-bit vector of [8 x i8]. The elements of the destination are copied /// from the corresponding lower 4 elements in this operand. /// \returns A 128-bit vector of [4 x float] containing the copied and converted /// values from the operand. static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX _mm_cvtpi8_ps(__m64 __a) { __m64 __b; __b = _mm_setzero_si64(); __b = _mm_cmpgt_pi8(__b, __a); __b = _mm_unpacklo_pi8(__a, __b); return _mm_cvtpi16_ps(__b); } /// Converts the lower four unsigned 8-bit integer values from a 64-bit /// vector of [8 x u8] into a 128-bit vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the CVTPI2PS + COMPOSITE instruction. /// /// \param __a /// A 64-bit vector of unsigned 8-bit integer values. The elements of the /// destination are copied from the corresponding lower 4 elements in this /// operand. /// \returns A 128-bit vector of [4 x float] containing the copied and converted /// values from the source operand. static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX _mm_cvtpu8_ps(__m64 __a) { __m64 __b; __b = _mm_setzero_si64(); __b = _mm_unpacklo_pi8(__a, __b); return _mm_cvtpi16_ps(__b); } /// Converts the two 32-bit signed integer values from each 64-bit vector /// operand of [2 x i32] into a 128-bit vector of [4 x float]. /// /// \headerfile /// /// This intrinsic corresponds to the CVTPI2PS + COMPOSITE instruction. /// /// \param __a /// A 64-bit vector of [2 x i32]. The lower elements of the destination are /// copied from the elements in this operand. /// \param __b /// A 64-bit vector of [2 x i32]. The upper elements of the destination are /// copied from the elements in this operand. /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the /// copied and converted values from the first operand. The upper 64 bits /// contain the copied and converted values from the second operand. static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX _mm_cvtpi32x2_ps(__m64 __a, __m64 __b) { __m128 __c; __c = _mm_setzero_ps(); __c = _mm_cvtpi32_ps(__c, __b); __c = _mm_movelh_ps(__c, __c); return _mm_cvtpi32_ps(__c, __a); } /// Converts each single-precision floating-point element of a 128-bit /// floating-point vector of [4 x float] into a 16-bit signed integer, and /// packs the results into a 64-bit integer vector of [4 x i16]. /// /// If the floating-point element is NaN or infinity, or if the /// floating-point element is greater than 0x7FFFFFFF or less than -0x8000, /// it is converted to 0x8000. Otherwise if the floating-point element is /// greater than 0x7FFF, it is converted to 0x7FFF. /// /// \headerfile /// /// This intrinsic corresponds to the CVTPS2PI + COMPOSITE instruction. /// /// \param __a /// A 128-bit floating-point vector of [4 x float]. /// \returns A 64-bit integer vector of [4 x i16] containing the converted /// values. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvtps_pi16(__m128 __a) { __m64 __b, __c; __b = _mm_cvtps_pi32(__a); __a = _mm_movehl_ps(__a, __a); __c = _mm_cvtps_pi32(__a); return _mm_packs_pi32(__b, __c); } /// Converts each single-precision floating-point element of a 128-bit /// floating-point vector of [4 x float] into an 8-bit signed integer, and /// packs the results into the lower 32 bits of a 64-bit integer vector of /// [8 x i8]. The upper 32 bits of the vector are set to 0. /// /// If the floating-point element is NaN or infinity, or if the /// floating-point element is greater than 0x7FFFFFFF or less than -0x80, it /// is converted to 0x80. Otherwise if the floating-point element is greater /// than 0x7F, it is converted to 0x7F. /// /// \headerfile /// /// This intrinsic corresponds to the CVTPS2PI + COMPOSITE instruction. /// /// \param __a /// 128-bit floating-point vector of [4 x float]. /// \returns A 64-bit integer vector of [8 x i8]. The lower 32 bits contain the /// converted values and the uppper 32 bits are set to zero. static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvtps_pi8(__m128 __a) { __m64 __b, __c; __b = _mm_cvtps_pi16(__a); __c = _mm_setzero_si64(); return _mm_packs_pi16(__b, __c); } /// Extracts the sign bits from each single-precision floating-point /// element of a 128-bit floating-point vector of [4 x float] and returns the /// sign bits in bits [0:3] of the result. Bits [31:4] of the result are set /// to zero. /// /// \headerfile /// /// This intrinsic corresponds to the VMOVMSKPS / MOVMSKPS instruction. /// /// \param __a /// A 128-bit floating-point vector of [4 x float]. /// \returns A 32-bit integer value. Bits [3:0] contain the sign bits from each /// single-precision floating-point element of the parameter. Bits [31:4] are /// set to zero. static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_ps(__m128 __a) { return __builtin_ia32_movmskps((__v4sf)__a); } #define _MM_ALIGN16 __attribute__((aligned(16))) #define _MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) #define _MM_EXCEPT_INVALID (0x0001U) #define _MM_EXCEPT_DENORM (0x0002U) #define _MM_EXCEPT_DIV_ZERO (0x0004U) #define _MM_EXCEPT_OVERFLOW (0x0008U) #define _MM_EXCEPT_UNDERFLOW (0x0010U) #define _MM_EXCEPT_INEXACT (0x0020U) #define _MM_EXCEPT_MASK (0x003fU) #define _MM_MASK_INVALID (0x0080U) #define _MM_MASK_DENORM (0x0100U) #define _MM_MASK_DIV_ZERO (0x0200U) #define _MM_MASK_OVERFLOW (0x0400U) #define _MM_MASK_UNDERFLOW (0x0800U) #define _MM_MASK_INEXACT (0x1000U) #define _MM_MASK_MASK (0x1f80U) #define _MM_ROUND_NEAREST (0x0000U) #define _MM_ROUND_DOWN (0x2000U) #define _MM_ROUND_UP (0x4000U) #define _MM_ROUND_TOWARD_ZERO (0x6000U) #define _MM_ROUND_MASK (0x6000U) #define _MM_FLUSH_ZERO_MASK (0x8000U) #define _MM_FLUSH_ZERO_ON (0x8000U) #define _MM_FLUSH_ZERO_OFF (0x0000U) #define _MM_GET_EXCEPTION_MASK() (_mm_getcsr() & _MM_MASK_MASK) #define _MM_GET_EXCEPTION_STATE() (_mm_getcsr() & _MM_EXCEPT_MASK) #define _MM_GET_FLUSH_ZERO_MODE() (_mm_getcsr() & _MM_FLUSH_ZERO_MASK) #define _MM_GET_ROUNDING_MODE() (_mm_getcsr() & _MM_ROUND_MASK) #define _MM_SET_EXCEPTION_MASK(x) (_mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | (x))) #define _MM_SET_EXCEPTION_STATE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | (x))) #define _MM_SET_FLUSH_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | (x))) #define _MM_SET_ROUNDING_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | (x))) #define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ do { \ __m128 tmp3, tmp2, tmp1, tmp0; \ tmp0 = _mm_unpacklo_ps((row0), (row1)); \ tmp2 = _mm_unpacklo_ps((row2), (row3)); \ tmp1 = _mm_unpackhi_ps((row0), (row1)); \ tmp3 = _mm_unpackhi_ps((row2), (row3)); \ (row0) = _mm_movelh_ps(tmp0, tmp2); \ (row1) = _mm_movehl_ps(tmp2, tmp0); \ (row2) = _mm_movelh_ps(tmp1, tmp3); \ (row3) = _mm_movehl_ps(tmp3, tmp1); \ } while (0) /* Aliases for compatibility. */ #define _m_pextrw _mm_extract_pi16 #define _m_pinsrw _mm_insert_pi16 #define _m_pmaxsw _mm_max_pi16 #define _m_pmaxub _mm_max_pu8 #define _m_pminsw _mm_min_pi16 #define _m_pminub _mm_min_pu8 #define _m_pmovmskb _mm_movemask_pi8 #define _m_pmulhuw _mm_mulhi_pu16 #define _m_pshufw _mm_shuffle_pi16 #define _m_maskmovq _mm_maskmove_si64 #define _m_pavgb _mm_avg_pu8 #define _m_pavgw _mm_avg_pu16 #define _m_psadbw _mm_sad_pu8 #define _m_ _mm_ #undef __DEFAULT_FN_ATTRS #undef __DEFAULT_FN_ATTRS_MMX /* Ugly hack for backwards-compatibility (compatible with gcc) */ #if defined(__SSE2__) && !__building_module(_Builtin_intrinsics) #include #endif #endif /* __XMMINTRIN_H */ /*===---- xopintrin.h - XOP intrinsics -------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __X86INTRIN_H #error "Never use directly; include instead." #endif #ifndef __XOPINTRIN_H #define __XOPINTRIN_H #include /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(256))) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B, (__v4si)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B, (__v4si)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B, (__v2di)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B, (__v2di)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B, (__v2di)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B, (__v2di)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddw_epi8(__m128i __A) { return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddd_epi8(__m128i __A) { return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddq_epi8(__m128i __A) { return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddd_epi16(__m128i __A) { return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddq_epi16(__m128i __A) { return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddq_epi32(__m128i __A) { return (__m128i)__builtin_ia32_vphadddq((__v4si)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddw_epu8(__m128i __A) { return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddd_epu8(__m128i __A) { return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddq_epu8(__m128i __A) { return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddd_epu16(__m128i __A) { return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddq_epu16(__m128i __A) { return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_haddq_epu32(__m128i __A) { return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsubw_epi8(__m128i __A) { return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsubd_epi16(__m128i __A) { return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsubq_epi32(__m128i __A) { return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)(((__v2du)__A & (__v2du)__C) | ((__v2du)__B & ~(__v2du)__C)); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C) { return (__m256i)(((__v4du)__A & (__v4du)__C) | ((__v4du)__B & ~(__v4du)__C)); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C) { return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_rot_epi8(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_rot_epi16(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_rot_epi32(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_rot_epi64(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B); } #define _mm_roti_epi8(A, N) \ ((__m128i)__builtin_ia32_vprotbi((__v16qi)(__m128i)(A), (N))) #define _mm_roti_epi16(A, N) \ ((__m128i)__builtin_ia32_vprotwi((__v8hi)(__m128i)(A), (N))) #define _mm_roti_epi32(A, N) \ ((__m128i)__builtin_ia32_vprotdi((__v4si)(__m128i)(A), (N))) #define _mm_roti_epi64(A, N) \ ((__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (N))) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_shl_epi8(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_shl_epi16(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_shl_epi32(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_shl_epi64(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sha_epi8(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sha_epi16(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sha_epi32(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sha_epi64(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B); } #define _mm_com_epu8(A, B, N) \ ((__m128i)__builtin_ia32_vpcomub((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), (N))) #define _mm_com_epu16(A, B, N) \ ((__m128i)__builtin_ia32_vpcomuw((__v8hi)(__m128i)(A), \ (__v8hi)(__m128i)(B), (N))) #define _mm_com_epu32(A, B, N) \ ((__m128i)__builtin_ia32_vpcomud((__v4si)(__m128i)(A), \ (__v4si)(__m128i)(B), (N))) #define _mm_com_epu64(A, B, N) \ ((__m128i)__builtin_ia32_vpcomuq((__v2di)(__m128i)(A), \ (__v2di)(__m128i)(B), (N))) #define _mm_com_epi8(A, B, N) \ ((__m128i)__builtin_ia32_vpcomb((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), (N))) #define _mm_com_epi16(A, B, N) \ ((__m128i)__builtin_ia32_vpcomw((__v8hi)(__m128i)(A), \ (__v8hi)(__m128i)(B), (N))) #define _mm_com_epi32(A, B, N) \ ((__m128i)__builtin_ia32_vpcomd((__v4si)(__m128i)(A), \ (__v4si)(__m128i)(B), (N))) #define _mm_com_epi64(A, B, N) \ ((__m128i)__builtin_ia32_vpcomq((__v2di)(__m128i)(A), \ (__v2di)(__m128i)(B), (N))) #define _MM_PCOMCTRL_LT 0 #define _MM_PCOMCTRL_LE 1 #define _MM_PCOMCTRL_GT 2 #define _MM_PCOMCTRL_GE 3 #define _MM_PCOMCTRL_EQ 4 #define _MM_PCOMCTRL_NEQ 5 #define _MM_PCOMCTRL_FALSE 6 #define _MM_PCOMCTRL_TRUE 7 static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comlt_epu8(__m128i __A, __m128i __B) { return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LT); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comle_epu8(__m128i __A, __m128i __B) { return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comgt_epu8(__m128i __A, __m128i __B) { return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GT); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comge_epu8(__m128i __A, __m128i __B) { return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comeq_epu8(__m128i __A, __m128i __B) { return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_EQ); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comneq_epu8(__m128i __A, __m128i __B) { return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_NEQ); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comfalse_epu8(__m128i __A, __m128i __B) { return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_FALSE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comtrue_epu8(__m128i __A, __m128i __B) { return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_TRUE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comlt_epu16(__m128i __A, __m128i __B) { return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LT); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comle_epu16(__m128i __A, __m128i __B) { return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comgt_epu16(__m128i __A, __m128i __B) { return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GT); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comge_epu16(__m128i __A, __m128i __B) { return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comeq_epu16(__m128i __A, __m128i __B) { return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_EQ); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comneq_epu16(__m128i __A, __m128i __B) { return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_NEQ); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comfalse_epu16(__m128i __A, __m128i __B) { return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_FALSE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comtrue_epu16(__m128i __A, __m128i __B) { return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_TRUE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comlt_epu32(__m128i __A, __m128i __B) { return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LT); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comle_epu32(__m128i __A, __m128i __B) { return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comgt_epu32(__m128i __A, __m128i __B) { return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GT); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comge_epu32(__m128i __A, __m128i __B) { return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comeq_epu32(__m128i __A, __m128i __B) { return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_EQ); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comneq_epu32(__m128i __A, __m128i __B) { return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_NEQ); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comfalse_epu32(__m128i __A, __m128i __B) { return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_FALSE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comtrue_epu32(__m128i __A, __m128i __B) { return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_TRUE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comlt_epu64(__m128i __A, __m128i __B) { return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LT); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comle_epu64(__m128i __A, __m128i __B) { return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comgt_epu64(__m128i __A, __m128i __B) { return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GT); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comge_epu64(__m128i __A, __m128i __B) { return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comeq_epu64(__m128i __A, __m128i __B) { return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_EQ); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comneq_epu64(__m128i __A, __m128i __B) { return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_NEQ); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comfalse_epu64(__m128i __A, __m128i __B) { return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_FALSE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comtrue_epu64(__m128i __A, __m128i __B) { return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_TRUE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comlt_epi8(__m128i __A, __m128i __B) { return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LT); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comle_epi8(__m128i __A, __m128i __B) { return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comgt_epi8(__m128i __A, __m128i __B) { return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GT); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comge_epi8(__m128i __A, __m128i __B) { return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comeq_epi8(__m128i __A, __m128i __B) { return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_EQ); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comneq_epi8(__m128i __A, __m128i __B) { return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_NEQ); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comfalse_epi8(__m128i __A, __m128i __B) { return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_FALSE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comtrue_epi8(__m128i __A, __m128i __B) { return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_TRUE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comlt_epi16(__m128i __A, __m128i __B) { return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LT); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comle_epi16(__m128i __A, __m128i __B) { return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comgt_epi16(__m128i __A, __m128i __B) { return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GT); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comge_epi16(__m128i __A, __m128i __B) { return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comeq_epi16(__m128i __A, __m128i __B) { return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_EQ); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comneq_epi16(__m128i __A, __m128i __B) { return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_NEQ); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comfalse_epi16(__m128i __A, __m128i __B) { return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_FALSE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comtrue_epi16(__m128i __A, __m128i __B) { return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_TRUE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comlt_epi32(__m128i __A, __m128i __B) { return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LT); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comle_epi32(__m128i __A, __m128i __B) { return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comgt_epi32(__m128i __A, __m128i __B) { return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GT); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comge_epi32(__m128i __A, __m128i __B) { return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comeq_epi32(__m128i __A, __m128i __B) { return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_EQ); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comneq_epi32(__m128i __A, __m128i __B) { return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_NEQ); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comfalse_epi32(__m128i __A, __m128i __B) { return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_FALSE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comtrue_epi32(__m128i __A, __m128i __B) { return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_TRUE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comlt_epi64(__m128i __A, __m128i __B) { return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LT); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comle_epi64(__m128i __A, __m128i __B) { return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comgt_epi64(__m128i __A, __m128i __B) { return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GT); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comge_epi64(__m128i __A, __m128i __B) { return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comeq_epi64(__m128i __A, __m128i __B) { return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_EQ); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comneq_epi64(__m128i __A, __m128i __B) { return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_NEQ); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comfalse_epi64(__m128i __A, __m128i __B) { return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_FALSE); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_comtrue_epi64(__m128i __A, __m128i __B) { return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE); } #define _mm_permute2_pd(X, Y, C, I) \ ((__m128d)__builtin_ia32_vpermil2pd((__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), \ (__v2di)(__m128i)(C), (I))) #define _mm256_permute2_pd(X, Y, C, I) \ ((__m256d)__builtin_ia32_vpermil2pd256((__v4df)(__m256d)(X), \ (__v4df)(__m256d)(Y), \ (__v4di)(__m256i)(C), (I))) #define _mm_permute2_ps(X, Y, C, I) \ ((__m128)__builtin_ia32_vpermil2ps((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), \ (__v4si)(__m128i)(C), (I))) #define _mm256_permute2_ps(X, Y, C, I) \ ((__m256)__builtin_ia32_vpermil2ps256((__v8sf)(__m256)(X), \ (__v8sf)(__m256)(Y), \ (__v8si)(__m256i)(C), (I))) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_frcz_ss(__m128 __A) { return (__m128)__builtin_ia32_vfrczss((__v4sf)__A); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_frcz_sd(__m128d __A) { return (__m128d)__builtin_ia32_vfrczsd((__v2df)__A); } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_frcz_ps(__m128 __A) { return (__m128)__builtin_ia32_vfrczps((__v4sf)__A); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_frcz_pd(__m128d __A) { return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_frcz_ps(__m256 __A) { return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_frcz_pd(__m256d __A) { return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A); } #undef __DEFAULT_FN_ATTRS #undef __DEFAULT_FN_ATTRS256 #endif /* __XOPINTRIN_H */ /*===---- xsavecintrin.h - XSAVEC intrinsic --------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __XSAVECINTRIN_H #define __XSAVECINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xsavec"))) /// Performs a full or partial save of processor state to the memory at /// \a __p. The exact state saved depends on the 64-bit mask \a __m and /// processor control register \c XCR0. /// /// \code{.operation} /// mask[62:0] := __m[62:0] AND XCR0[62:0] /// FOR i := 0 TO 62 /// IF mask[i] == 1 /// CASE (i) OF /// 0: save X87 FPU state /// 1: save SSE state /// DEFAULT: __p.Ext_Save_Area[i] := ProcessorState[i] /// FI /// ENDFOR /// __p.Header.XSTATE_BV[62:0] := INIT_FUNCTION(mask[62:0]) /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c XSAVEC instruction. /// /// \param __p /// Pointer to the save area; must be 64-byte aligned. /// \param __m /// A 64-bit mask indicating what state should be saved. static __inline__ void __DEFAULT_FN_ATTRS _xsavec(void *__p, unsigned long long __m) { __builtin_ia32_xsavec(__p, __m); } #ifdef __x86_64__ /// Performs a full or partial save of processor state to the memory at /// \a __p. The exact state saved depends on the 64-bit mask \a __m and /// processor control register \c XCR0. /// /// \code{.operation} /// mask[62:0] := __m[62:0] AND XCR0[62:0] /// FOR i := 0 TO 62 /// IF mask[i] == 1 /// CASE (i) OF /// 0: save X87 FPU state /// 1: save SSE state /// DEFAULT: __p.Ext_Save_Area[i] := ProcessorState[i] /// FI /// ENDFOR /// __p.Header.XSTATE_BV[62:0] := INIT_FUNCTION(mask[62:0]) /// \endcode /// /// \headerfile /// /// This intrinsic corresponds to the \c XSAVEC64 instruction. /// /// \param __p /// Pointer to the save area; must be 64-byte aligned. /// \param __m /// A 64-bit mask indicating what state should be saved. static __inline__ void __DEFAULT_FN_ATTRS _xsavec64(void *__p, unsigned long long __m) { __builtin_ia32_xsavec64(__p, __m); } #endif #undef __DEFAULT_FN_ATTRS #endif /*===---- xsaveintrin.h - XSAVE intrinsic ----------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __XSAVEINTRIN_H #define __XSAVEINTRIN_H #ifdef _MSC_VER #define _XCR_XFEATURE_ENABLED_MASK 0 #endif /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xsave"))) static __inline__ void __DEFAULT_FN_ATTRS _xsave(void *__p, unsigned long long __m) { __builtin_ia32_xsave(__p, __m); } static __inline__ void __DEFAULT_FN_ATTRS _xrstor(void *__p, unsigned long long __m) { __builtin_ia32_xrstor(__p, __m); } #ifndef _MSC_VER #define _xgetbv(A) __builtin_ia32_xgetbv((long long)(A)) #define _xsetbv(A, B) __builtin_ia32_xsetbv((unsigned int)(A), (unsigned long long)(B)) #else #ifdef __cplusplus extern "C" { #endif unsigned __int64 __cdecl _xgetbv(unsigned int); void __cdecl _xsetbv(unsigned int, unsigned __int64); #ifdef __cplusplus } #endif #endif /* _MSC_VER */ #ifdef __x86_64__ static __inline__ void __DEFAULT_FN_ATTRS _xsave64(void *__p, unsigned long long __m) { __builtin_ia32_xsave64(__p, __m); } static __inline__ void __DEFAULT_FN_ATTRS _xrstor64(void *__p, unsigned long long __m) { __builtin_ia32_xrstor64(__p, __m); } #endif #undef __DEFAULT_FN_ATTRS #endif /*===---- xsaveoptintrin.h - XSAVEOPT intrinsic ----------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __XSAVEOPTINTRIN_H #define __XSAVEOPTINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xsaveopt"))) static __inline__ void __DEFAULT_FN_ATTRS _xsaveopt(void *__p, unsigned long long __m) { __builtin_ia32_xsaveopt(__p, __m); } #ifdef __x86_64__ static __inline__ void __DEFAULT_FN_ATTRS _xsaveopt64(void *__p, unsigned long long __m) { __builtin_ia32_xsaveopt64(__p, __m); } #endif #undef __DEFAULT_FN_ATTRS #endif /*===---- xsavesintrin.h - XSAVES intrinsic --------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __XSAVESINTRIN_H #define __XSAVESINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xsaves"))) static __inline__ void __DEFAULT_FN_ATTRS _xsaves(void *__p, unsigned long long __m) { __builtin_ia32_xsaves(__p, __m); } static __inline__ void __DEFAULT_FN_ATTRS _xrstors(void *__p, unsigned long long __m) { __builtin_ia32_xrstors(__p, __m); } #ifdef __x86_64__ static __inline__ void __DEFAULT_FN_ATTRS _xrstors64(void *__p, unsigned long long __m) { __builtin_ia32_xrstors64(__p, __m); } static __inline__ void __DEFAULT_FN_ATTRS _xsaves64(void *__p, unsigned long long __m) { __builtin_ia32_xsaves64(__p, __m); } #endif #undef __DEFAULT_FN_ATTRS #endif /*===---- xtestintrin.h - XTEST intrinsic ----------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use directly; include instead." #endif #ifndef __XTESTINTRIN_H #define __XTESTINTRIN_H /* xtest returns non-zero if the instruction is executed within an RTM or active * HLE region. */ /* FIXME: This can be an either or for RTM/HLE. Deal with this when HLE is * supported. */ static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rtm"))) _xtest(void) { return __builtin_ia32_xtest(); } #endif basic_string::_M_create:???/-nostdinc-nostdinc++-U-D-I-isystem/builtins-Qunused-arguments-Wno-unknown-warning-option from project Generated on .basic_string::substrbasic_string::basic_stringError reading the directory: .hhDelayed %] Processing .qdoc-xc++-include.hCould not find commands for %Y-%b-%d revision /otherIndexbuild_pathboutput patho:[:]p::edata pathda/builtins/__clang_cuda_math.h/builtins/__clang_hip_cmath.h/builtins/__clang_hip_math.h/builtins/__wmmintrin_aes.h/builtins/adxintrin.h/builtins/ammintrin.h/builtins/amxcomplexintrin.h/builtins/amxfp16intrin.h/builtins/amxintrin.h/builtins/arm64intr.h/builtins/arm_acle.h/builtins/arm_bf16.h/builtins/arm_cde.h/builtins/arm_cmse.h/builtins/arm_fp16.h/builtins/arm_mve.h/builtins/arm_sve.h/builtins/armintr.h/builtins/avx2intrin.h/builtins/avxifmaintrin.h/builtins/avxintrin.h/builtins/avxvnniint8intrin.h/builtins/avxvnniintrin.h/builtins/bmi2intrin.h/builtins/bmiintrin.h/builtins/builtins.h/builtins/cet.h/builtins/cetintrin.h/builtins/cldemoteintrin.h/builtins/clflushoptintrin.h/builtins/clwbintrin.h/builtins/clzerointrin.h/builtins/cmpccxaddintrin.h/builtins/cpuid.h/builtins/crc32intrin.h/builtins/emmintrin.h/builtins/enqcmdintrin.h/builtins/f16cintrin.h/builtins/float.h/builtins/fma4intrin.h/builtins/fmaintrin.h/builtins/fxsrintrin.h/builtins/gfniintrin.h/builtins/hexagon_protos.h/builtins/hexagon_types.h/builtins/hresetintrin.h/builtins/htmintrin.h/builtins/htmxlintrin.h/builtins/ia32intrin.h/builtins/immintrin.h/builtins/intrin.h/builtins/inttypes.h/builtins/invpcidintrin.h/builtins/iso646.h/builtins/keylockerintrin.h/builtins/larchintrin.h/builtins/limits.h/builtins/lwpintrin.h/builtins/lzcntintrin.h/builtins/mm3dnow.h/builtins/mm_malloc.h/builtins/mmintrin.h/builtins/movdirintrin.h/builtins/msa.h/builtins/mwaitxintrin.h/builtins/nmmintrin.h/builtins/opencl-c-base.h/builtins/opencl-c.h/builtins/pconfigintrin.h/builtins/pkuintrin.h/builtins/pmmintrin.h/builtins/popcntintrin.h/builtins/prfchiintrin.h/builtins/prfchwintrin.h/builtins/ptwriteintrin.h/builtins/raointintrin.h/builtins/rdpruintrin.h/builtins/rdseedintrin.h/builtins/riscv_ntlh.h/builtins/riscv_vector.h/builtins/rtmintrin.h/builtins/s390intrin.h/builtins/serializeintrin.h/builtins/sgxintrin.h/builtins/sha512intrin.h/builtins/shaintrin.h/builtins/sifive_vector.h/builtins/sm3intrin.h/builtins/sm4intrin.h/builtins/smmintrin.h/builtins/stdalign.h/builtins/stdarg.h/builtins/stdatomic.h/builtins/stdbool.h/builtins/stddef.h/builtins/stdint.h/builtins/stdnoreturn.h/builtins/tbmintrin.h/builtins/tgmath.h/builtins/tmmintrin.h/builtins/tsxldtrkintrin.h/builtins/uintrintrin.h/builtins/unwind.h/builtins/vadefs.h/builtins/vaesintrin.h/builtins/varargs.h/builtins/velintrin.h/builtins/velintrin_approx.h/builtins/velintrin_gen.h/builtins/vpclmulqdqintrin.h/builtins/waitpkgintrin.h/builtins/wasm_simd128.h/builtins/wbnoinvdintrin.h/builtins/wmmintrin.h/builtins/x86gprintrin.h/builtins/x86intrin.h/builtins/xmmintrin.h/builtins/xopintrin.h/builtins/xsavecintrin.h/builtins/xsaveintrin.h/builtins/xsaveoptintrin.h/builtins/xsavesintrin.h/builtins/xtestintrin.hAUHATUSHHHHHtHHHHtHH)LHI9tfH'HHHHHHHHHstatic (, = ) constAWAVAUATIUHSHL7VdH%(H$1Ll$PILT$PHD$XH|$@HT$HH|$D$@LD$6u5H$dH+%( D$6H[]A\A]A^A_H$f11HL$8H$LH$H HL$P1H$HDŽ$Ƅ$D$XD$xHDŽ$DŽ$HD$p)D$`ED$78HWHE0HL0AFƒAFD$7USERу@uHfDHH"H;HHP I<$MHHHH9uHE0M4$HH8GvHHWLH$H$H$LIH@ I9G IG L$HLH$H$LHH@ H9G(HG H$H$HD$H9tH$HpH$H$H9tH$Hp1HUxI4$LLIFH9t[IFIƘHpM9.MuMtGIu[L]A\L)A]A^fDH H9KmfDIƘM9[]A\A]A^ATIUHSHHtHtL)HwHuH5H=IVI6H=IHH@IH3}8 ECLI HL9t$zL|$HD$(H|$HH%Ht$HfH$HDŽ$H$H)$PH$L$I9H?HLL)HHHH?)HcHHMLILL9Hl$0DL$IELL9H$IEH$IEH$MmIEAEH$LcLsH$HKL9LHFHxHHL$H|$H|$HL$TyHkH{H9$H9'HUHE HMLeHU H'H}HEHEHEHOfD H5H=HSH3H=IHH@MMV1A~8DAvCLHyDHUH90HD$HKH$HH90LH|$H4LH$pH$H$L9_fo$H $)$UfDfo$)$f.fo$)$af.fo$)$f.fo$L)$jfo$H)$LI$ HH@0H9!LfDH5H=IVI6H=HHH@LM/A|$8`At$CHHg H5H=IVI6H=Hfo$ )$ f.fo$)$HSH;$uH;H$Ht uHLH HI9uL$H$IvJT:.qdo>H$fHnH$HPH0)$8fHD$)$HDŽ$H$H$PLL$ H|$0H$HL$@ZYL9t-I?IGH9XIGI HpI9uH$0HtH$@H)@ $@H$H;|$8tH$HpH$L$L9I}xIH9tIHpI}XIEhH9t IEhHpI]HM}@L9t+I?IGH9PIGI HpI9uM}@MtIuPLL)I} IE0H9t IE0HpI}IEH9IEIŘHpI92H$HtH$H)H$H$H9tH$HpD$0dI L9t$pCH$H|$H$ H$H$H|$(H|$HHPH$H;$tH$Hp1fDHH=H=HL)H9}HH9ЅIHI>HL9M H9 I.IVMfH$IvH H$H$HDŽ$H$L9H$HpIE IL9l$0IIfI L9fDIŘL90I L9fDHH;*3H5H\$`HH$HHIE IL9l$0KNH$H$LHH H$H$HLp 1H$ H$HH$HHHPHH$HH$0HD$@Hfo$fo$Ht$ H$xfo$H$pfo$)$P)$`)$)$0H$PH$`H9tH$`HpHCHH HL$hHt$ E11$*H$fDŽ$PH$0HAf$*$H$HS(L$H$HDŽ$H9wH)IHHƄ$H$HD$HD$HDŽ$xH$pHCBt(LCH H$xH$pI<0H$H;D$H$H9$Mt!H<0IHLHH$pH$HH$xH$xH9H$pLPH;T$8H$I9D/H$pL$xDH$xHH)I9ID5H$H$pH;D$H$H9$Mt#H<0IHt$LH$pH$H|$PLAIL$H$xH$8H$XH$HH$`H$PH$hL$0H$xL$8HIOH$H$H$MoH$ H$L$H$L$L$HL$DŽ$8HDŽ$@HDŽ$XDŽ$hHDŽ$pHDŽ$DŽ$HDŽ$HDŽ$(PSt$PL$L$ASARL$L$AQMAPHL$@QH|$`L$XH$H$xH$H$H@H;<$tH$HpH$H$H|$PH$8H$@H$H$HH|$L$HL)H H$H|$MM H9=H$I9LH5 HH$L$H$BD* H$H|$PHH$GHx1H$H9tH$Hp$H\$PH$xH$pHHt$@Ƅ$ HHH|$ H$pH;|$tH$HpILPHH$0H$@H9H$@HpE11H$H$H$H)HH$HHHH$L$L$L$IHHD$M+Ht$H;t:yZL)Lc HH~RILILHLLkM9IFHuM)I9}HI9~DEuLHLL$L$Ht$HH9$HD$H|$ HHUHMPH$H$fH$fo$0)$0H$H$@IH$HT$H$HDŽ$@)$H9IxIH9tIHpIXIGhH9t IGhHpI@MoHHI9t0fDH;HCH9HCH HpL9uI@Ht IwPH)I IG0H9t IG0HpI?IGH9IGIǘHpL;|$,H$tH$H$H)H|$ H$HH$H9$PH\$`H5HH$H$HH5H$w3H5LH|$P1H$LH$HH$H;|$BKH|$PH|$ H$pH;|$tH$HpILPHH$0H$@H9tH$@HpH$H;|$8tH$HpH|$xH I9fDIǘL9|$n=L;t$? MtI`LLI>H$IFH$Aom)$UI.MfH$IFL$L$LiH$HHHH|$hE11H$1zcH|$PH5H$H$Hu H;D$: HPHH9HHHPH9H HHHHHHPHP HHHXH$HPH)HHIH@@IHXEHS H9HCHCHS HCHHC H H{HHHCH IHCH{H9uHSHtHIHH{HSHSHCL$H$L)HHD$XY$H$H$pHH$8L$8HL)H H$0L$@MOL9 H$@I9 LH5HH$0L$8BD8LCHH$8H$0HK@M<0L9 H$@I9c M L$8B8L$8HL)H\H$0MOL9 H$@I9 LH$0L$8BD8$I>HCHCHCHHC H[HH} H$H;$o$HU0H9 HU0HE E(H H$H$H$HDŽ$H$H;<$tH$HpH\$PH5HH|$@HH$H;<$tH$HpH$H$L$H$ LH$LxHL$I9Ƅ$ HDŽ$LGAwH$HL$HH)L9H$J,>H9H$ H9EMt!H<0ILHH$H$(H$HH)H$H$L}H9zH$ I9f(.hH$L$LH|$@D(H$H9tH$ HpH$H$H$L$HD$H$H;$HH$HPH$H $H HDŽ$Ƅ$HPH$H$M1LH$H$HA1HLH H|$ 1HfDŽ$PH$0H HL$`H\$`H5HHH|$(H0I9MMtILHHEHUHE!H58H\$ Ht$@fDŽ$PL$0H$$foHt$1HH$L$0H$fDŽ$P$L$H$HQDŽ$L5Ht$ H|$@H$L$0L$H$fDŽ$PH$8L$fDII9GL$D$MEcIG1H@HxHpH|8.sAGAIGAGL$MoM;/tfDI}$IEH$Ht HxLIGLhHPIWMt!=AEPAUL$fAoMoM;/uL$f)$M=YAEPAUIELP= AE PAU IELPHEHMLeHE Hmqf8.hf8.H H{HSHT$PH|$@HH$hoPPA 1LLL$H LL$H$HtHH4$H$H} HU(,HL$H$M1H$pQH$HA1H L$H$L$H$|H$LFH5,Ht$@H|$ fDŽ$PH$0HL$hH|$ AH5HH$0HD$HH|$HH$HHD$`HH$HHH$1H$p;E1fo$xHxw $-HL$hH5HH|$ H$0HD$H.H$pH<0I%LHH$0:HD$H$phEIIELP=tgAE PAU wHHtI|$htCHLI\$H[]A\A]'wHHrfHID$hLm0LHH8HGHH@&w>G&wRfLo GtSII}G&vHAD$p>f.HHuAuIAEu¸AVfAUATIUHoSHHĀdH%(HD$x1H|$H,$HD$D$D$HD$HD$ D$(D$8D$HD$XI$`HD$hD$pL`PID$H"IIMtcAELd$Pʃ<P>IFHT$>wLHt H|$h/HLLd$HC1҃HHH|$HtCHD$`H\$@HhH9vH;HH9wH|$HD$ H4HD$xdH+%(AH[]A\A]A^f.LLHID$HIIMHM0I>G A9FtAFHLMnf'HHfDLHD$hMu0LHH8HGHH@&w=G&w?f.Lw Gt8II>G&vHٸD$pTHHuAuIAFuԸAWAVE1AUATIUHSH(DnEuf.IE9HLHHuHHN| KL;ItAGI\$ƃHʃv@'w,HHs?IBID$AG>wLHt I|$hLLII\$E9=HH@8HL8HXLA#1E1HLHAGEA%rLsHDHD$#LMl$IL;t$AI6HtFMl$AxAvA'w+HLs?HBID$F>wHHt$Ht$HmI|$haHHt$ID$hLF0LHH8HGHH@&GLD$&LG GII8G&vHt$Ht$HLLeH([]A\A]A^A_fDHH@ +AW@AGH@HT(ItHAGHfL,փ<ЃwSAWA@HHt(It7H<uHH4HtLE tdAG<1fAGEoAAAI1MGLJD+HHHLHt2HLHtHLHH(1[]A\A]A^A_fALHHR#tBt=YM(I HD3 M.I HD31M HHH:HL4M9ufDIM9toIuLuMAGAf<uE1I AII(1M(JD+HHHHfDAGfLID$hIO0HHH8HGHH@&GHL$&DHO GHH9G&vHDMI H3Ht$Ht$LD$HH3At9AD$p.HL$HHht!AD$pWIA@uHAuոA<1E1HHxPPfD1 A4LHHzu*MtI1MHD3HHHAAVAUATUSHĀdH%(HD$x1>HIHHcH>H|$ HHT$0fo\$ HD$ Hl$8HT$Lt$@)$HT$H9t:l8H0LAĄt 7H0LAĄ1HT$4H$HH$H9uL9uHD$xdH+%(=H[]A\A]A^HD$xdH+%(:H[]A\A]A^H|$ HHT$0foL$ HD$ H\$8HT$Lt$@) $HT$H9t>K6H0LAĄeHT$3H$HH$H9uI9u,HD$xdH+%(':H[]A\A]A^HD$xdH+%(9H[]A\A]A^HD$xdH+%(9H[]A\A]A^Hv HD$xdH+%(:HHL[]A\A]A^HvHD$xdH+%(9HHL[]A\A]A^HvHD$xdH+%(8HHL[]A\A]A^HvXHD$xdH+%(8HHL[]A\A]A^HvHD$xdH+%(;HHL[]A\A]A^HvHD$xdH+%(8HHL[]A\A]A^HvHD$xdH+%(8HHL[]A\A]A^HvtlHD$xdH+%(7HHL[]A\A]A^HFH_6L`II9uK6IL9>6I4$LuE1HvtHD$xdH+%(G:HHL[]A\A]A^HvtHD$xdH+%(D7HHL[]A\A]A^HvwHD$xdH+%(?7HHL[]A\A]A^Hv;HD$xdH+%(7HHL[]A\A]A^HvHD$xdH+%(6HHL[]A\A]A^HvHD$xdH+%(h6HHL[]A\A]A^HvHD$xdH+%(66HHL[]A\A]A^HvKHD$xdH+%(9HHL[]A\A]A^HvHD$xdH+%(}5HHL[]A\A]A^HvHD$xdH+%(8HHL[]A\A]A^HvHD$xdH+%(4HHL[]A\A]A^Hv[HD$xdH+%(*8HHL[]A\A]A^HvHD$xdH+%(4HHL[]A\A]A^HvHD$xdH+%($4HHL[]A\A]A^HvHD$xdH+%(4HHL[]A\A]A^HvkHD$xdH+%(3HHL[]A\A]A^Hv/HD$xdH+%(3HHL[]A\A]A^HvHD$xdH+%(>3HHL[]A\A]A^HvHD$xdH+%(*3HHL[]A\A]A^Hv{HD$xdH+%(2HHL[]A\A]A^Hv?HD$xdH+%(2HHL[]A\A]A^HvHD$xdH+%(2HHL[]A\A]A^HvHD$xdH+%(4HHL[]A\A]A^HD$xdH+%(2H[]A\A]A^HvfHD$xdH+%(J4HHL[]A\A]A^H|$ HHT$0fo|$ HD$ H\$8HT$Lt$@)<$HT$H9t>-H0LAĄHT$d*H$HH$H9uI9uYH|$ HHT$0foD$ HD$ H\$8HT$Lt$@)$HT$H9t>-H0LAĄHT$*H$HH$H9uL9uHD$xdH+%(1H[]A\A]A^HD$xdH+%(1H[]A\A]A^HD$xdH+%(-1H[]A\A]A^HD$xdH+%(2H[]A\A]A^H|$ HHT$0fo\$ HD$ H\$8HT$Lt$@)$HT$H9t>+H0LAĄHT$(H$HH$H9uI9uHLHHD$xdH+%(/HHL[]A\A]A^HvHt Hu@LHLHHD$xdH+%(m/HuHHL[]A\A]A^HvYHD$xdH+%(]/HHL[]A\A]A^HD$xdH+%(/H[]A\A]A^HvHD$xdH+%(0HHL[]A\A]A^H|$ HHT$0fol$ HD$ H\$8HT$Lt$@),$HT$H9t>)H0LAĄ$HT$~&H$HH$H9uI9uHvHt^H\$`D$`H?E1H\$Pfn}H HD$`AufpfH~LL$`H|$`XZH9tHD$`HpHD$xdH+%(-HHL[]A\A]A^HD$xdH+%(-H[]A\A]A^HD$xdH+%(L.H[]A\A]A^HD$xdH+%(U-H[]A\A]A^H|$ HHT$0foT$ HD$ H\$8HT$Lt$@)$HT$H9t>(H0LAĄHT$$H$HH$H9uI9uhHD$xdH+%(,HHL[]A\A]A^HD$xdH+%(,H[]A\A]A^HFHH0HPOHD$xdH+%( ,HHL[]A\A]A^HD$xdH+%(-H[]A\A]A^HD$xdH+%(],H[]A\A]A^HvHD$xdH+%(-HHL[]A\A]A^H|$ HHT$0fot$ HD$ H\$8HT$Lt$@)4$HT$H9t>&H0LAĄHT$$H$HH$H9uI9uHD$xdH+%(+H[]A\A]A^HF0H0HPHD$xdH+%()+HHL[]A\A]A^HF H0HPHD$xdH+%(v,HHL[]A\A]A^dHD$xdH+%(*HHL[]A\A]A^HD$xdH+%(o*H[]A\A]A^HD$xdH+%(*H[]A\A]A^HvHD$xdH+%(+HHL[]A\A]A^HD$xdH+%(,H[]A\A]A^HFHH0HPrHD$xdH+%(4*HHL[]A\A]A^HD$xdH+%(x)H[]A\A]A^HD$xdH+%(N)H[]A\A]A^HD$xdH+%((HHL[]A\A]A^HD$xdH+%(*H[]A\A]A^HD$xdH+%((H[]A\A]A^HvjHD$xdH+%(S*HHL[]A\A]A^HD$xdH+%((H[]A\A]A^HD$xdH+%((H[]A\A]A^HD$xdH+%(:(H[]A\A]A^HD$xdH+%()H[]A\A]A^HD$xdH+%((H[]A\A]A^HvuHD$xdH+%(~'HHL[]A\A]A^HD$xdH+%(5'HvH[]A\A]A^Hv HU( E%uH%ET%H]XHLdx H L9|%HLufHD$xdH+%('H[]A\A]A^HvzHD$xdH+%(7'HHL[]A\A]A^HD$xdH+%(&H[]A\A]A^HD$xdH+%( (H[]A\A]A^HD$xdH+%(o&HHL[]A\A]A^HD$xdH+%('HvH[]A\A]A^H|$ HHT$0foL$ HD$ H\$8HT$Lt$@) $HT$H9t> H0LAĄHT$}H$HH$H9uI9uHD$xdH+%('H[]A\A]A^HD$xdH+%(%H[]A\A]A^HD$xdH+%(&H[]A\A]A^H|$ HHT$0fod$ HD$ H\$8HT$Lt$@)$$HT$H9t>H0LAĄ HT$H$HH$H9uI9uHD$xdH+%($H[]A\A]A^H|$ HHT$0foD$ HD$ H\$8HT$Lt$@)$HT$H9t>}H0LAĄcHT$=H$HH$H9uI9u*HD$xdH+%(#H[]A\A]A^HD$xdH+%(x#H[]A\A]A^HD$xdH+%(%H[]A\A]A^F!H^Ld fHI9!HLH0HPuHvHD$xdH+%($HHL[]A\A]A^HD$xdH+%(%H[]A\A]A^HFH0HP_HD$xdH+%(#HHL[]A\A]A^HD$xdH+%(#H[]A\A]A^HvHD$xdH+%(B#HHL[]A\A]A^H|$ HHT$0fod$ HD$ H\$8HT$Lt$@)$$HT$H9t>QH0LAĄ*HT$H$HH$H9uL9uHD$xdH+%(i!H[]A\A]A^HFH0HPHD$xdH+%(1!HHL[]A\A]A^HFH0HPHD$xdH+%(M!HHL[]A\A]A^HF H0HPHD$xdH+%("HHL[]A\A]A^HvRHD$xdH+%("HHL[]A\A]A^HD$xdH+%(K#H[]A\A]A^HvHUHEPHPHXHLd0H I9HLuHD$xdH+%(!H[]A\A]A^HviHD$xdH+%( HHL[]A\A]A^HD$xdH+%(!H[]A\A]A^HFH0HPHD$xdH+%(HHL[]A\A]A^HD$xdH+%( H[]A\A]A^HvHD$xdH+%( HHL[]A\A]A^HD$xdH+%(!H[]A\A]A^HFH0HP8HD$xdH+%(HHL[]A\A]A^H|$ HHT$0fo|$ HD$ H\$8HT$Lt$@)<$HT$H9t>H0LAĄdHT$H$HH$H9uI9u+HvzHD$xdH+%(HHL[]A\A]A^Hv>HD$xdH+%(HHL[]A\A]A^HD$xdH+%(:H[]A\A]A^HvHD$xdH+%(HHL[]A\A]A^HFH0HPHD$xdH+%(*HHL[]A\A]A^HD$xdH+%(EH[]A\A]A^Hv9HD$xdH+%(AHHL[]A\A]A^HD$xdH+%(H[]A\A]A^HD$xdH+%(}H[]A\A]A^H|$ HHT$0fot$ HD$ H\$8HT$Lt$@)4$HT$H9t>vH0LAĄHT$H$HH$H9uL9uHv1HD$xdH+%(HHL[]A\A]A^HvHD$xdH+%(HHL[]A\A]A^HD$xdH+%(H[]A\A]A^HvHUE M8EDH]HHLdh H I9HLu=DHD$xdH+%(H[]A\A]A^HD$xdH+%(H[]A\A]A^HvHD$xdH+%(HHL[]A\A]A^HD$xdH+%(H[]A\A]A^HD$xdH+%(H[]A\A]A^H|$ HHT$0foD$ HD$ H\$8HT$Lt$@)$HT$H9t> H0LAĄHT$H$HH$H9uI9uHvHD$xdH+%(HHL[]A\A]A^HvHD$xdH+%(HHL[]A\A]A^HD$xdH+%(GH[]A\A]A^HD$xdH+%(H[]A\A]A^HD$xdH+%(H[]A\A]A^HvHD$xdH+%(EHHL[]A\A]A^HD$xdH+%(H[]A\A]A^HD$xdH+%(KH[]A\A]A^HD$xdH+%(H[]A\A]A^HvCHD$xdH+%(}HHL[]A\A]A^HD$xdH+%(H[]A\A]A^HD$xdH+%(H[]A\A]A^HD$xdH+%(H[]A\A]A^HvHD$xdH+%(@HHL[]A\A]A^HD$xdH+%(H[]A\A]A^HFHH0HP,HD$xdH+%(HHL[]A\A]A^HD$xdH+%(5H[]A\A]A^HvHD$xdH+%(-HHL[]A\A]A^HD$xdH+%(H[]A\A]A^HFH0HPcHD$xdH+%(HHL[]A\A]A^H|$ HHT$0foT$ HD$ H\$8HT$Lt$@)$HT$H9t>H0LAĄHT$H$HH$H9uI9uVHvHD$xdH+%(HHL[]A\A]A^HviHD$xdH+%({HHL[]A\A]A^HF0H~HD$xdH+%(`H[]A\A]A^HF'HHhH\H9HuLAĄqHH9uSHD$xdH+%(QH[]A\A]A^HD$xdH+%(qH[]A\A]A^HvXHD$xdH+%(yHHL[]A\A]A^HD$xdH+%(H[]A\A]A^H|$ Ht$(LHt$0LAĄHD$xdH+%(7H[]A\A]A^HD$xdH+%(5H[]A\A]A^HvqHD$xdH+%(HHL[]A\A]A^HD$xdH+%(GH[]A\A]A^HD$xdH+%(HHL[]A\A]A^HD$xdH+%("H[]A\A]A^HvHD$xdH+%(HHL[]A\A]A^HD$xdH+%(uH[]A\A]A^HFH0HPKHD$xdH+%(HHL[]A\A]A^HD$xdH+%(H[]A\A]A^HvHD$xdH+%(HHL[]A\A]A^HD$xdH+%(H[]A\A]A^HD$xdH+%(<H[]A\A]A^HD$xdH+%(H[]A\A]A^Hv?HD$xdH+%(tHHL[]A\A]A^Hv HD$xdH+%(HHL[]A\A]A^HD$xdH+%(H[]A\A]A^HvHD$xdH+%(HHL[]A\A]A^HvfHD$xdH+%(HHL[]A\A]A^HD$xdH+%(PH[]A\A]A^HFH0HPHD$xdH+%(HHL[]A\A]A^HD$xdH+%(H[]A\A]A^HvHD$xdH+%(HHL[]A\A]A^HD$xdH+%(iH[]A\A]A^HFH0HP5HD$xdH+%(HHL[]A\A]A^HD$xdH+%(H[]A\A]A^HvHD$xdH+%(HHL[]A\A]A^HD$xdH+%(H[]A\A]A^HvsHD$xdH+%( HHL[]A\A]A^HD$xdH+%(H[]A\A]A^HvHD$xdH+%(=HHL[]A\A]A^HD$xdH+%(HvH[]A\A]A^H|$ HHT$0fo\$ HD$ H\$8HT$Lt$@)$HT$H9t>"H0LAĄHT$'H$HH$H9uI9uHD$xdH+%(H[]A\A]A^HvHD$xdH+%(!HHL[]A\A]A^HD$xdH+%(H[]A\A]A^HvHtcH\$`D$`H?HH\$Xfnm AHD$`AufpfH~LL$`H|$`AXAYH9tHD$`HpHD$xdH+%( HHL[]A\A]A^HFH0HPHD$xdH+%(&HHL[]A\A]A^HD$xdH+%(/ H[]A\A]A^HD$xdH+%(B H[]A\A]A^HvAĄ~EH]HlH9uHH9H3>uHvLu6Hv(HU0!HD$xdH+%(8 HHL[]A\A]A^H|$ HHT$0fol$ HD$ H\$8HT$Lt$@),$HT$H9t>H0LAĄMHT$H$HH$H9uL9uHD$xdH+%( H[]A\A]A^HD$xdH+%( H[]A\A]A^H|$ HHT$0foL$ HD$ H\$8HT$Lt$@) $HT$H9t>H0LAĄHT$CH$HH$H9uI9uHHD$xdH+%(7 H[]A\A]A^HD$xdH+%( H[]A\A]A^HD$xdH+%( H[]A\A]A^HD$xdH+%( H[]A\A]A^@HHH$HT$DH;HH$HT$-DHHH$HT$.DHrHH$HT$DHHH$HT$GDH=HH$HT$DHHH$HT$DHfHH$HT$DHYHH$HT$DHHH$HT$DHHH$HT$hDHHH$HT$DHvHH$HT$DH HH$HT$DHHH$HT$DHHH$HT$DH]HH$HT$DHHH$HT$nDHPHH$HT$DHPHH$HT$HHH^H`HTHH?HHHHHrHHvHHH#HH}H'HH$HT$gHH$HT$HH$HT$HH$HT$HH$HT$HH$HT$HH$HT$HH$HT$HH$HT$HH$HT$uHH$HT$HH$HT$HH$HT$ HH$HT$HH$HT$HH$HT$LHH$HT$eHH$HT$|HH$HT$lHH$HT$HD$xdH+%(HHL[]A\A]A^HHD$xdH+%(^HHL[]A\A]A^HD$xdH+%(HHL[]A\A]A^HD$xdH+%(HHL[]A\A]A^HD$xdH+%(HHL[]A\A]A^HD$xdH+%(BHHL[]A\A]A^H^HHZTraverseStmt: Stack overflow, giving up traversalAWAVAUATUSHdH%(H$GG='IHHtl<{fH|$pL$HD$h)$)$)$)$HD$pHD$xfLHDŽ$DŽ$HDŽ$HDŽ$)$)$)$)$Ll$pMfo$H$fo$fo|$pfo$L$H$fo$)d$ fo$fo$)l$fo$fo$)$)$)$)$)$AowMoAo(IG foT$)t$pAow8)$AoHAOHfH~fI~AW8)$AowXHI)$fo|$ HDŽ$ DŽ$()$A(AGXL9OMfI}IL9wIG@Ao_(Aog8MoHXIG )\$0)d$@foH$fo$IWh\$Pfo$fo$)$fo$fo\$ H$fo$AWpAWAg(foT$0fod$$L$H$AO8AwHAX)$)$)$I9vfH;HI9wfol$0foL$@IGhAGp)$)$Mt"HD$PH8H4$LHHE<-UHHDAGpIGhvfH|$pHt$hHD$pHD$x)$)$)$)$IG(HDŽ$DŽ$I;G0 HhHIG(HHH$@HH$0HH$@H$8$8Ht@uHHHHDLD$8H$0AĄtZ$8JHH9tHH9sH H0HHHrHHH9r勌$8H$0pAH9tH|$hLl$pL$ I_8IG(HSH9HIG(Ll$pAoMtLH$H$HhH9vH;HH9wHD$xLH4H$dH+%(HĘD[]A\A]A^A_ú1H5H=Afo$HD$xfo$fo$fo$H$)|$ )\$>Ll$pAfLHDŽ$HDŽ$)$)$)$)$Mfo$fo\$pfo$fo$fo$)|$fo$fo$)$L$L$)$fo$fo$)$)$)$)$AogXAoW8AGXH$Ao_HAOH)$fo$$AowfH~Ao()$fol$HHD$@H$ fH~Ջ$HMoD$P$(Mw )t$p)$)$Ao(Ag8H9II}IL9wAow8AoO(IG@MoMw )L$ fo)t$0HXfo$IWhl$Xfo$fo$)$fo$fol$H$fo$AWpAwfo4$L$$L$A(AW8A_HAgX)$)$)$H9sDH;HH9wHD$@fo|$ foT$0IGhD$P)$AGp)$MLHD$XH8J4L%Ht$hII0IW@HBHfHnfHnHflAG(fHnflAG8fo$Lt$xfo$fo$)L$fo$)4$_Ao $HH) $HD$8dH+%(ujH@HL[]A\fDUH\$ D$ HAH\$HD$ Q1LL$ H|$ XZH9tHD$ HpqHAWL~AVAUIATIUSHF %HlL9u%4@LLI]IL9M7MtAFI]Pʃv'w+HHs?IBIEAF>wLHzI}hoLIEhIV0HHH8HGHH@&wVGHT$&fHW Gt%HH:G&vHHW Gut|@AEpf.AD$@t0AD$ %ItHtHL[]A\A]A^A_fDH[]A\A]A^A_@f.HBzuHT$HHIAVAULnATIUHSVHLtM9t:L HI9tH3Hu1[]A\A]A^DAT$HI\L4I9u0HI9tH3HufDAT$HHHLL4L9u3fDHI9tH3HukAT$HHRI\L$L9u$HI9tH3Hu#AVAULnATIUHSVHLtM9t:L HI9tH3Hu1[]A\A]A^DAT$HI\L4I9u0HI9tH3HufDAT$HHHLL4L9u3fDHI9tH3HukAT$HHRI\L$L9u$HI9tH3Hu#AWAVAULn(ATIUHSHFLt(M9t7L fHI9t'H3HuE1HD[]A\A]A^A_It$HAƄtAD$HI\L<L9u4HI9tH3HufDAD$HHI\L$L9uo@HI9^H3HuGAWAVAULn@ATIUHSHFHItuE1HD[]A\A]A^A_f.AD$HHIttAD$Mt@M9tL@H3HtHI9uIt$HtIt$(HADŽpAD$HI\L4L9t-@H3H>HI9uAD$HHHLL4L9t-@H3HHI9uAD$HH @I\L4L9t.DH3HHI9uAD$HHI\L$L9uHI9H3HupAVAUATUHSH^H DfdH%(HD$1JLdL9tWII IM9t7I6HuHT$dH+%(H []A\A]A^EeJHD$HIH$L$1I9t/u`H0HtHT$u'H$HH$I9uHu̸oHt'LH$HT$@LfDLH$HT$kAVAUATIUHSHv(t9I\$8AAD$0L4I9u9fHI9t'H3HuE1[D]A\A]A^fI\$HAD$@L4I9u#HI9tH3HufDI\$XAD$PL4L9u#HI9tH3Hu{I\$hAD$`L4I9u HL9tH3HuCI$A$L$I9u(DHL9H3HuAWAVAUATUSHhdH%(HD$X1Ht&HF0HHuBHHHt<1HT$XdH+%(WHh[]A\A]A^A_HtfDH3Lt$HLD$H|$t0D$ IHL$L9t@A?:I(M9uDD$ tOHIL, I(M9t-A<$wI|$ID$H9tID$I(HpM9uӋD$ H|$H4HHEHmH9^HD$1HD$HD$ H9t5ucH0HHT$u&HD$HHD$H9uHuƸ@Ht'LHD$HT$LfDLHD$HT$gM9HD$@Ll$0H$DIwIWLLH$HLD$HD$0H|$A7LH|$0H;<$tHD$@HpIG(L9tf.8IvH(I9uH|$=DI9wD11AUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUHSHHhdH%(HD$XHFHt HptvHH|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/urH0Ht%HT$u9H$HH$H9uI9u̸HT$XdH+%(uYHh[]A\A]Ht'LH$HT$@LfDLH$HT$YAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUSHHhdH%(HD$X1H|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAVAUATIUHSH`Hv0dH%(HD$X1t2DuA1Eu QA9tHHE8ULH4uE1HD$XdH+%(cH`D[]A\A]A^Du1LIu-DHI9tHE8LH4ufDu1LIu-DHL9tHE@LH4uWfDHH|$ HHT$0foD$ HD$ H\$8HT$Lt$@)$HT$H9t3u%wIwLHt$0Ht9IL9|$(t)A%wFuIwHHT$8dH+%(HH[]A\A]A^A_uAGMoH@IDHD$ I9uCw%wIuLHt$0HtIL9l$ 9AE%wuIuHfDuAEI]H@HHD$H9uH%w!HsLHt$0HHH9\$]%wuHsH@uËCHSH@HHD$H9tIDD%w!IvLHt$0H]IL9t$aA%ttwuIvHuAFINH@LL9tfDHHLD$H $H $LD$HI9umDIvHWHsHIuHIwH7HD$8dH+%(u?HvHH[]A\A]A^A_HvHD$8dH+%(uHH[]A\A]A^A_AUATUSH(dH%(HD$1HH.}5EHIHcH>Hu/Hu HD$dH+%(rH(L[]A\A]fDHuHHH;(t ]HE0HŁHH9t"fHuLtHH9uHT$dH+%(H([]A\A]@Hu(tkHuHD$dH+%(@HD$dH+%(HuH(L[]A\A]fDHu(H1^fHD$dH+%(uBH(H[]A\A]fDHu Ht tHu(HD$dH+%(fHu0Htu1Hu _UHE0HRHH9HfHH9HLu1DHu(Ht Hu0HD$dH+%(GDHD$dH+%(.Hu H([]A\A]fHHHD$dH+%(a@Hu(u1Hu_H}(UHE0HRHH9HHH9HLu1rDHDHw GHH>G&vHDHDHw GHH>G&vHDIHLLLf.Hu _HD$dH+%(Hu(fEHu( DHu UHE(HRHH9_Hf.HH9CHLu12DHu0Ht Hu8H}Lm1fDHu wHu0HDHu OEHu8HHuAVAUATIUHSHvtAAAD$I\$(HL4L9u>HI9tH3HuE1[D]A\A]A^ÐAD$HAT$HD(ILAD$HfHу<tjЃwAT$At$HHL(IL H<uHH4HZ[H]A\A]A^HL4L9u/@HI9tH3Hu AD$fLH+AUATIUSHHHnHLmIt[IuHt Lw4IEHH1HL@Hw0HEHH1HLtd@Hw0HCHHt01Ht(HL[]A\A]fDH[]A\A]HsHfD1@fHuHsIuHAUATUHSHLd$Ht$LHtXHXHt[LcIudHwAHCHH1HHH[]A\A]ÐLHusH[]A\A]Ml$ILyID$HH1HHH1HXHtHsHt HtHYHCHHt 1HHsHIuHtHzL!IEHHt21Ht*H1It$HfIuHAWAVAUATUHSHHHdH%(HD$8%j #HFHH8H~HfHnfl)$iHLt$ D$ L}Lt$Ld$HD$H<$IHpXHLLMjAH|$ XZL9tHD$ HpHsLHt$HD$8dH+%(HHH[]A\A]A^A_@u#FLvLd$H@II9uof.HT$8dH+%(HH[]A\A]A^A_%wIvLHt$Ht|IL9tA%tnwuIvHfuAFM~H@M,M9tLHtIM9tLHu1&IvHgHFHt^H0HPHD$8dH+%(HH[]A\A]A^A_HD$8dH+%(|HvHH[]HA\A]A^A_HvHD$8dH+%(uLHH[]A\A]A^A_fDeH4$EHHT$De"Ld$H<$H@(uHtHLt$ D$ L}Lt$Ld$HD$H<$IHBVisitTemplateSpecializationTypeLoc AWAVIAUATUHSHHHdH%(HD$8HF Ld$LHD$HfAnLl$ HMLl$HH}AD$(fpHD$ fH~uH|$ XZL9tHD$ HpHs HAńtYCtUM~H(Mtf.IHM9t,IoLHHSHD$(HT$ )D$uE1HD$8dH+%(~HHD[]A\A]A^A_ÐL-#H5LH5LHt$LH\$HT$Ht$LH|$HD$ H9AWAVAUATUSHXdH%(HD$H1H5HIHIHH;@tLH1H3JD)HHHT$HdH+%(HHXL[]A\A]A^A_HHAAu6GHHcH>HH4$HLl$HfnfpfH~H<$A|$M,$Md$HH\$0H\$ HHD$8ALHD$0ATLL$0H|$0^AXH9tHD$0HpAHD$HdH+%(HXD[]A\A]A^A_IuLtIuLt@Hu HD$HdH+%(uJHXL[]A\A]A^A_H~ 1Hu JD)HHHHD$HdH+%(iDHD$HdH+%(uHvHXL[]A\A]A^A_IEH0HPHD$HdH+%(DHD$HdH+%(uHXLL[]A\A]A^A_Hv(LHu0HD$HdH+%(LfDHv(LHuHD$HdH+%(HH4$HLl$HfnfpfH~FHv(rILLLAIUHtHv HtLEI] H0M|(Ll$ fDHoELLHUHD$8HT$0)D$ HHI9uxHv(HL\IUHv HD$HdH+%(HXL[]A\A]A^A_HHHD$HdH+%(HD$HdH+%(HXLL[]A\A]A^A_HD$HdH+%(mHXLL[]A\A]A^A_M4$I\$IH4$LLl$LfnfnH$fbH@ fI~H@P#HHPHl$ LHH$HLLIAHp SH|$0HD$@ZYH9HD$HdH+%(HXLL[]A\A]A^A_HD$HdH+%(oHXLL[]A\A]A^A_HD$HdH+%(BHXLL[]A\A]A^A_HD$HdH+%(HXLL[]A\A]A^A_Hv0HtL;Hu(HD$HdH+%(HD$HdH+%(HXLL[]A\A]A^A_HD$HdH+%(}HXLL[]A\A]A^A_HD$HdH+%(PHXLL[]A\A]A^A_HD$HdH+%(#HXLL[]A\A]A^A_HD$HdH+%(HXLL[]A\A]A^A_HD$HdH+%(HXLL[]A\A]A^A_HD$HdH+%(HXLL[]A\A]A^A_HD$HdH+%(oHXLL[]A\A]A^A_HD$HdH+%(BHXLL[]A\A]A^A_HD$HdH+%(HXLL[]A\A]A^A_HD$HdH+%(HXLL[]A\A]A^A_HD$HdH+%(HXLL[]A\A]A^A_HD$HdH+%(HXLL[]A\A]A^A_HD$HdH+%(aHXLL[]A\A]A^A_HD$HdH+%(4HXLL[]A\A]A^A_HD$HdH+%(HXLL[]A\A]A^A_HvHHAWIAVAUATUHSHH(F<i1fFDcAAAH{1HsLID,HHHHD$DcA{DAE1@FMoǃHʃv@'w*HHs?HBIGF>w"HHt$Ht$Ht IhLMoIE9JtHgCD9vJt(LuD$D$H([]A\A]A^A_A<HHt$IGhHN0HHH8HGHH@&w\GHL$&]DHO Gt/HH9G&vHt$Ht$HHO GuAGpCSHHD(HLCHfH,у<tpЃSsHHL(HL H<uHH4HH(L[]A\A]A^A_HL$L9ua@HI9t_HuLu:f.HAHt$Ht$HL$HHCf1ATIUHSH~ H1It$ HHDHHHu [1]A\HsH[]A\ATIUHSH~ H1It$ HHDHHHu [1]A\HsH[]A\ATIUHSH~ H1It$ HHDHHHu [1]A\HsH[]A\ATIUHSH~ H1It$ HHDHHHu [1]A\HsH[]A\ATIUHSH~ H1Hs H[]JD! A\HHHATIUHSH~ H1Hs H[]JD!A\HHHATIUHSH~ H1Hu LH []HA\HHATIUHSH~ H1Hs H[]JD!A\HHHAVAUATUSHPH4$HT$dH%(HD$H1HHH~IHfHnfHnfl)D$H|$H\$0D$0LmH\$ Lt$ HD$(H|$IHpXHLLMjAH|$0XZH9tHD$0HpL$LvOHT$HdH+%(HP[]A\A]A^ÐDeHt$EHHT$DetHHHH|$H@(uHtH|$H\$0D$0LmH\$ Lt$ HD$(H|$IHAUATUSHHH4$HT$dH%(HD$81HHHH+HH\$ D$ H\$Ll$HD$H<$IHpXHLHMjAH|$ XZH9tHD$ HpHT$8dH+%(HH[]A\A]fH<$H@(t/DcH4$CHHT$DcHtH+HH\$ D$ H\$Ll$HD$H<$IHAWAVAUATIUHSHHoF@H~@dH%(HD$81)$HtO]H4$EHHT$]E1HD$8dH+%(HHD[]A\A]A^A_fHH\$ D$ LuH\$L|$HD$H<$IHpXHLLMjAH|$ XZH9tHD$ HpAD$Ml$hMthM9t&LH3H2HI9uIt$HIt$(HAƄAD$HI\L<I9t-@H3HHI9uAD$HHHLL<I9t-@H3HHI9uAD$HH @I\L<I9t.DH3HJHI9uAD$HHI\L<L9t!DH3H HI9uA|$0AD$HI\LfD]Ht$ EHHT$(]tLl$ fDLl$ H\$PD$PL}LH\$@Lt$@HD$HH|$ HD$HMLjHHT$AvAUIATIUHSHF<uY1fF]H}1HuHJD+L[H]A\HA]HH@<tATIUHSH~H1HsH[]JD!A\HHHATIUHSHBHHtFH0HPtGI|$ 1It$ HHD[]HA\HHHv(Hu[1]A\ATIUHSH~ H1Hs H[]JD!A\HHHAUATIUHSHH~FHfHH91tKFHHPL,1HsHKD%HHHHt?CftCI\$Md @HL9t'HHH0HPuH1[]A\A]@H[]A\A]ATIUHSH~ H1Hs H[]JD!A\HHHATIUHSH~ H1Hs H[]JD!A\HHHATIUHSH~ H1Hs H[]JD!A\HHHATIUHSH~ H1Hs H[]JD!A\HHHATIUHSH~ H1Hs H[]JD!A\HHHATIUHSH~ H1Hs H[]JD!A\HHHAWAVAUATIUHSHHdH%(HD$8HFxt,1H9XH@LH4HExu]Ju(HT$8dH+%(0HH[]A\A]A^A_DLmhIE`hHHGLuhIIF`HGH@MlHo@I9t$HELH0HPQHL9u܉AfofH~)$HuHH\$ D$ M4$H\$L|$HD$H<$IHpXHLLMjAH|$ XZH9HD$ Hp@A\$H4$AD$LHT$A\$fSfHLHHIE`HHtkHaH8W 9PRPHL>fDH<$H@(SHGHH\$ D$ M4$H\$L|$HD$H<$IHHLHHIF`HHt*t%HtH8W 9PtPHLDHLuhGH@MlIF`tTHHt(t#HtH8W 9PtPHLHu=HlVHLHHIF`AWIAVAUATUSHhHN8Ht$dH%(HD$X1uCDHHH1HQHD$XdH+%(HhL[]A\A]A^A_D$ fHʋD$ H;BD$ HRL$M|AD$ Mt$%IlI9u*1fLLI_IL9 M.MtAEI_ǃHʃv@'w+HHs?IBIGAE>wLHyIhnLIGhIM0HHH8HGHH@&GHL$&@HO Gt%HH9G&vHHO Guu\HAuRPAD$@tAD$ %ItHtLHD$HH8D$ HAGpfHL$HHyEDofH~)D$ H%H|$ H\$@D$@I/H\$0Ll$0HD$8H|$ IHpXHLHMjAH|$@XZH9tHD$@HpHD$HH8HHH HI HHD$Hp0HD$XdH+%(HhL[]A\A]A^A_A_Ht$ AGLHT$(A_yHD$XdH+%(ujHh1[]A\A]A^A_H|$ H@(uHtH|$ H\$@D$@I/H\$0Ll$0HD$8H|$ IHHAUHATIUHSHF HcH>HF0Hu>H[]A\A]DHv(HH[]A\A]f.HF(HtH0HPHH[]A\A]fDHv(tCIt$0H^(F$L$L9uzHI9fH3Hu@1H[]A\A]H^(F$L$L9u(DHI9H3Hu1H^(F$L$L9uHI9H3Hu1wH[]A\A]H^(F$L$L9ufHI9H3Hu1'H^(F$L$L9ucHI9NH3Hu1H^(F$L$L9u#HI9H3Hu1H^(F$L$L9uHI9H3Hu1gHv(TI\$8AD$0L$L9u@HI9~H3Hu1Hv(I\$8AD$0L$L9u?@HI9.H3Hu1Hv(I\$@AD$8L,L9u@HI9H3Hu1wHv(dI\$8AD$0L$L9u@HI9H3Hu1'H^(F$L$L9ucHI9NH3Hu1H^8F0L,L9uHI9H3Hu1H^8F0L,L9uHI9tH3Hu1kA|$#Hv(HHVH6H^(F$L$L9uHI9nH3Hu1DI\$HAD$@L$L9u;HI9.H3Hu1DI\$HAD$@L$L9uHI9H3Hu1I\$PAD$HL$L9uDHI9H3Hu1GAUIATUHHSHtLHHt HtLAEtULPHL$AEtLLH HI9tH3HuH[]A\A]DH[]A\A]1AUIATUHHSHtLHHt HtLAEtULPHL$AEtLLH HI9tH3HuH[]A\A]DH[]A\A]1AUIATUHHSHtLHHt HtLAEtULPHL$AEtLLH HI9tH3HuH[]A\A]DH[]A\A]1AUIATUHHSHtLHHt HtLAEtULPHL$AEtLLH HI9tH3HuH[]A\A]DH[]A\A]1AUIATUHHSHtLHHt HtLAEtULPHL$AEtLLH HI9tH3HuH[]A\A]DH[]A\A]1AVIAUATUHSH~htwIv0HAńtWAFtPLPHL$AFLHI9u!fDHI9tH3HuE1[D]A\A]A^ÐLf`ID$Xu tHLHHID$XHHOFHwLHxIhmLIGhIN0HHH8HGHH@&w\GHL$&7HO Gt%HH9G&vHHO GuAGpfI$H HPHH9I|$pt0LP@LHu1H[]A\A]A^A_f.AD$tPLPHH,AD$LHH9t"f.H3LtHH9uH[]A\A]A^A_@HA"HL$HHL҄;I$1\AWIAVAUATUHSHhL'VdH%(HD$X1H|$ I$T$ HD$(H|$D$HT$Lu6D$HD$XdH+%(D$Hh[]A\A]A^A_fM'HU0H\$0HLHHHE1fnMLIHAHfpjfH~H|$@HD$PZYH9tHD$@HpHLD$9EwLHyIhnLIGhIM0HHH8HGHH@&weGHL$&w,@HO Gt0HH9G&vHِHL$HHuuHAAGpf.Ht HHHtLEHPHL$EtqHHI9ulDHI9ZH3LuAHLH31AWAVAUATUSHH(HnXHt$HWHWII)Ld$3HI9LHWIHWII)LH9LI I9M}AGMuf.IHtG u_Lc<'wHHHs1?IBHCAW>wLHt H{hLHLcL9M>IAGPvЍzۃ]zLcw<'_E11H9tCHLLIM}AGIfIM9tCLHu1HT$dH+%(H([]A\A]A^A_IwHtIM9cfDIwHufDHHH@HIHpHuALpIFHHt1HD@IvH3HuIuHHqH|1AVAUATIUHSHPL/VdH%(HD$H1H|$IT$HD$H$HT$Lu,E1HD$HdH+%(mHPD[]A\A]A^DM,$HU0H\$ HLHHHE1fnMLIHAHfpjfH~H|$0HD$@ZYH9tHD$0HpHLAńCEH$HH$H9uI9u̸HT$XdH+%(u^Hh[]A\A]Ht'LH$HT$@L|LH$HT$TAVAUATIUHSF@Hv EU@@%@H\ HL,HD HL9uvfDHHI9tcHEL3HtH0HPLtLLu[1]A\A]A^fF%HD H0HPG@[]A\A]A^AUATUHSHHhdH%(HD$XHFtHH0HPtvHH|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WAUATUHSHHhdH%(HD$XHFtHH0HPtvHH|$ IHT$0foD$ HD$ Hl$8HT$Ld$@)$HT$H9t/utH0Ht%HT$u;H$HH$H9uI9u̸HT$XdH+%(u[Hh[]A\A]DHt'LH$HT$@LfDLH$HT$WATIUHSH@HvdH%(HD$81Ht#EH?IL$fnfpfH~…uWEtNAo $HH) $HE(LH0HPuzHD$8dH+%(H@1[]A\UH\$ D$ HAH\$HD$ Q1LL$ H|$ XZH9pHD$ Hp]HD$8dH+%(uH@HL[]A\AWAVAUATUHSHHoNH~dH%(H$1)L$HQfo\$H|$DcC)\$ HLGHD$(IfInfHnLfl)D$@LL$pH|$@L;HD$xL$LL$Lt$pƄ$H|$@IHpXHLLAjLL$H$XZL9tH$HpH|$ 9DcHH|$@Ll$ HT$PfoT$@HD$@Hl$XHT$0Ld$`)T$ HT$(H9t9H0Ht+HT$(OHD$ HHD$ H9uL9u¸H$dH+%(/HĨ[]A\A]A^A_fH|$L3L$HD$xLl$pL|$pƄ$H|$IHpXHLMjALH$Y^L9H$HpH|$H@(HH|$L3L$HD$xLl$pL|$pƄ$H|$IHJfHt'LHD$ HT$(fLgLHD$ HT$(:@DkHt$@CHHT$HDkDcNH|$ HHDc(f.H|$@H@(uHtLL$pH|$@L;HD$xL$LL$Lt$pƄ$H|$@IHAWAVAUATUHSHHoN H~ dH%(H$1)L$Hfo\$H|$DcC)\$ HLGHD$(IfInfHnLfl)D$@oLL$pH|$@L;HD$xL$LL$Lt$pƄ$H|$@IHpXHLLAjLL$H$XZL9tH$HpH|$ DcHE0HtH0HPH5HE@Ht$u HtH0HPH HH|$@Ll$ HT$PfoT$@HD$@Hl$XHT$0Ld$`)T$ HT$(H9t9H0Ht+HT$(RHD$ HHD$ H9uI9u¸H$dH+%(BHĨ[]A\A]A^A_@H|$L3L$HD$xLl$pL|$pƄ$H|$IHpXHLMjALH$Y^L9H$HpHE0H{fH|$H@(RHFH|$L3L$HD$xLl$pL|$pƄ$H|$IHBfHt'LHD$ HT$(cLdLHD$ HT$(7@1tfDkHt$@CHHT$HDkZDc;H|$ HHDcJf.H|$@H@(uHtLL$pH|$@L;HD$xL$LL$Lt$pƄ$H|$@IH|AWAVAUATIUHSHHxH~ dH%(HD$h1Ff<)HID$fHnfHnfl)D$LL$@H|$HD$HL}Lt$PL $Lt$@D$PH|$IHpXHALLjLL$H|$PAXAYL9tHD$PHpSfH{(I1Hs(JD!HHHT$hdH+%(HxHH[]A\A]A^A_ffoT$H|$DmE)T$ HLGHD$(IfInfHnLfl)D$0VLULL$@H|$0HD$HL|$PL $LT$L|$@D$PH|$0IHLAHpXjH|$LL$H|$PXZL9tHD$PHpH|$ DmDH1H|$H@(HLL$@H|$HD$HL}Lt$PL $Lt$@D$PH|$IHf.HC HHfDfDDuHt$0EHHT$8DuDmHD$hdH+%(Hx1[]A\A]A^A_H|$ HHDmcDH|$0H@(jH^LL$@H|$0HD$HL}Lt$PLL$Lt$@D$PH|$0H$HLjLL$HAHT$H|$PY^L9AWAVAUATIUHSHxoN8H~8dH%(HD$h1)L$H bfoT$H|$]E)T$ HLGHD$(IfInfHnLfl)D$0aLL$@H|$0HD$HL}Lt$PLL$Lt$@D$PH|$0IHpXHLLAjLL$H|$PXZL9tHD$PHpH|$ ]AD$Ml$`Mt`M9DLHI9+H3HuE1HD$hdH+%(HxD[]A\A]A^A_H|$H\$PD$PLuH\$@L|$@HD$HH|$IHpXHLMjALH|$PY^H9)HD$PHpf.H|$H@(HH|$H\$PD$PLuH\$@L|$@HD$HH|$IHRfDIt$HIt$(HAƄAD$HI\L<I9uHI9tH3HuwDmHt$0EHHT$8DmAƄ]EfDH|$ HH]AƄAD$HHHLL<I9uHI9t~H3HuH|$0H@(:H.LL$@H|$0HD$HL}Lt$PLL$Lt$@D$PH|$0IHAD$HH @I\L<L9u *HI9tH3HuAD$HHI\L$L9uHI9H3HuAWAVAUATIUHSHxoN8H~8dH%(HD$h1)L$H bfoT$H|$]E)T$ HLGHD$(IfInfHnLfl)D$0aLL$@H|$0HD$HL}Lt$PLL$Lt$@D$PH|$0IHpXHLLAjLL$H|$PXZL9tHD$PHpH|$ ]AD$Ml$`Mt`M9DLHI9+H3HuE1HD$hdH+%(WHxD[]A\A]A^A_H|$H\$PD$PLuH\$@L|$@HD$HH|$IHpXHLMjALH|$PY^H9)HD$PHpf.H|$H@(HH|$H\$PD$PLuH\$@L|$@HD$HH|$IHRfDIt$HIt$(HAƄAT$HI\L<I9uHI9tH3HuwDmHt$0EHHT$8DmAƄ]EfDH|$ HH]AƄAT$HHHLL<I9uHI9t~H3HuH|$0H@(:H.LL$@H|$0HD$HL}Lt$PLL$Lt$@D$PH|$0IHAT$HH RI\L<L9u *HI9tH3HuAT$HIIMII9u +IL9tI7HuAT$HHMdII9uIL9I4$HuAVAUATUSH dH%(HD$1Ht+FH^wHIHcH>fHT$dH+%( H []A\A]A^f.HvHD$dH+%(HuH L[]A\A]A^HvtSHD$dH+%(Hu ‹VHFHH9UH HH9CHuLu1.fHvtHD$dH+%("Hu(OfDHD$dH+%(Hv,FH^H4tEHlH9uDHH9H3Lu1HD$dH+%(H []A\A]A^HD$dH+%(oH []A\A]A^FH(H\(H9u%DHH9HuLu1HD$dH+%( H []A\A]A^FLnLtM9LDHI9H3Lu1FHH\H9uHH9kHuLu1YDnEG1I!Ht$LA9HLH4$Lu1FHhH\hH9uDHH9HuLu1FHŐHƐH9uHH9HuLu1HD$dH+%(H []A\A]A^FHtEH\HH9u,@HH9HuLu1 FHPH\PH9ufDHH9HuLu1HvEH\(H(H9uDHH9HuLu1yHD$dH+%(H []A\A]A^FHPH\PH9u9fHH9#HuLu1FHH\H9ufDHH9HuLu1FH(H\(H9ufDHH9HuLu1HvKHD$dH+%(Hu0HD$dH+%(mH []A\A]A^FHPH\PH9u#HH9HuLu1FH H\ H9ufDHH9HuLu1FHH\H9ufDHH9HuLu1HD$dH+%(H []A\A]A^HD$dH+%(fH []A\A]A^HvHD$dH+%(0Hu@]FHPH\PH9ufDHH9HuLu1FHH\H9ufDHH9HuLu1FHhH\hH9unfDHH9[HuLu1IHD$dH+%(uWH []A\A]A^EHI\H,H9u fHH9H3Lu1AUIATUHSHHF(HXLdI9u.fDHI9tH3Hu1H[]A\A]LtLHHt HtAEtALPHL$AEt8LHH3HtHI9uH[]A\A]1AUIATUHSHH~(I}(WL$I9t6HfHI9tH3HuH[]A\A]DIE(HXLdI9LtLHHt HtlAEtyLPHL$AEtpLHHI9tGH3HucHI9{H3HuH1[]A\A]H[]A\A]1HtKATUHSH^L$L9u#HI9tH3Hu[]A\[]A\fDAVAUIATUHSLf(MtiAD$H_Pʃ?IBHEAD$>wLHt H}hLHH]IE0HtsPtlHXHLd0f.H I9tGHHu1[]A\A]A^D'tHHD[fDLtLHHt HtAEtMLPHL$AELHL9t @H3HWHI9u[]A\A]A^fDLHEhMt$0LHH8HGHH@&w>G&w?fLw Gt@II>G&vHfEphHHuAuIAFuո1AVAUATUHSLn0HMtgAELgPʃ?IBHCAE>wLHt H{hLHLcHEHHtHHHu8HAĄu/E1[D]A\A]A^À'wHH_ufHtHHHt HtEtHPHL,EHH(L9uuHI9gHuHuOfDtmHEHHHu H5H@HHHFH1HtHHHHuHtDHvfL%LtHLHHRLHChMu0LHH8HGHH@&w7G&wJLw GtJII>G&vHِCpH.HHuAuIAFuø1ZAWAVIAUATUHSHHH3H LhLdM9u(LLI^IM9M}MtAGI^ƃHʃv@'w+HHs?IBIFAG>wLHxI~hmLIFhIO0HHH8HGHH@&GHL$&HO GHH9G&vHDHPHXHLd0 @H I9tgHLuE1HD[]A\A]A^A_HL$HHbuHAAFpZfHLtHu@LAĄtEtHPHL,EtCHHL9uVHI9FH3Lu/r1AWAVIAUATUHSHhL'VdH%(HD$X1H|$ I$T$ HD$(H|$D$HT$Lu.E1HD$XdH+%(HhD[]A\A]A^A_DM&HU0H\$0HLHHHE1fnMLIHAHfpjfH~H|$@HD$PZYH9tHD$@HpHH^H LhLdM9u#BfLLI^IM9$M}MtAGI^ƃHʃv@'w+HHs?IBIFAG>wLHxI~hmLIFhIO0HHH8HGHH@&wdGHL$&w+HO Gt0HH9G&vHِHL$HHuuHA AFpf.HPt.HXHLd0fHLH L9uHLAĄEwHHt I~hHLI^AD$IT$(HH,H9nI$LLI^IL9<M}MtAGI^ƃHʃv@'w+HHs?IBIFAG>wLHxI~hmLIFhIO0HHH8HGHH@&w\GHL$&HO Gt%HH9G&vHHO Gu8AFpf'pHHBWfDAD$HI\(AD$L,HD$pHD$L9u@DuT}tHELHH0HPoHI9bH+Ev}uHuL}(tHuLH"HEHtuH}H8oH)D$@HMH|$@L$HD$xL|$pL\$Ƅ$H|$@HD$HpXHAjH|$(LL$HT$ H$XZL9tH$HpHE@HHLxHHl0I L9LLu@1H$dH+%(HĨ[]A\A]A^A_DHAHL$HHZHIFhLm0LHH8HGHH@&wCG&Lo GII}G&vHԸAFpfDLH|$@Ld$ HT$PfoL$@HD$@H\$XHT$0Hl$`)L$ HT$(H9t5ucH0LHT$(u&HD$ HHD$ H9uH9uƸ~@Ht'LHD$ HT$(LfDLHD$ HT$(g@E~Ht$@AFLHT$HE~H|$@H@(uHtMH|$@L$HD$xL|$pL\$Ƅ$H|$@HD$HHHIAkIAE\WAWIAVAUATIUSH(HF0Ht/HHPHtH0HPfDHD$AT$$H HL$ID$(HT$Hp LpHlL9u$CLLI_II9$M.MtAEI_AxAvA'w+HLs?IBIGAE>wLHwIhlLIGhME0LHH8HGHH@&wcGLD$&fDLG Gt%II8G&vHLG GuAuIA@J@AGpf.HD$HD$H9D$}Lt LHHtLAD$tJLPHH,AD$LHH3LHH9ufHH@H_ƃPʃv@'w*HHs?HBIGE>wHHtIhtVHLI_B1H([]A\A]A^A_LD$HHi5DHIGhLm0LHH8HGHH@&w7G&w9Lo Gt9II}G&vHظAGp4HHuAuIAEuԸ1}AVAUIATUHSHF(HHHJHH0HPTIE(HB:HHHH82GLg0MrAD$H]Pʃv'w,HHs?IBHEAD$> LHH}hLHEhMt$0LHH8HGHH@&G&9Lw GII>G&vH@HItx@H_Pʃv'w,HHs?IBHEAD$>wLHt H}hLHH]LtLHHt HtvAEtWLPHL$AEFLH DHI9tH3Hu[]A\A]A^[]A\A]A^f.[1]A\A]A^HGHHxBHH,f.LHEhMt$0LHH8HGHH@&w>G&w?fLw Gt@II>G&vHfEpHHuAuIAFuոHHt1H% AWAVAUATUHSHHXdH%(H$HHF8HH@H@ HPL`HLl0 I M9tLHu1H$HdH+%( HX[]A\A]A^A_DE1DHD9`BH@DHAH4HE87DHU8HH HtS s $2G^p+ G8 Ldy )C=4\Xg W]?\q . E0!& v v, i'A>M1D 9O i?(GCC: (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0GNUzRx 0 DXl   4H\p     $8L ` t***( <Pdx/Ma/Ma/Ma%LT 8pEG@] AA \:H l A xAEZ A `<QFBB A(G0) (A BBBF %LT$ZEHG {EA0DXlLAFBB A(A0 (A BBBL U(A BBB2E^ M A$tAAG hAA$,iFHG DIBTZL@ L AtL H ABkVHFEA A(D0 (D DBBK d(A ABBHABBB B(A0A8D`(8A0A(B BBB$D_EAD RAA@lBDD r ABA T ABI v ABA H{FIA A(G0 (A ABBI t(A ABB8FID A(D08 (I ABBJ H87BBB B(A0A8D`8A0A(B BBBHQBBB B(A0A8D`88A0A(B BBBLBBB B(A0A8Dp 8A0A(B BBBH 8 BFA A(D0x (A ABBD \ p l FBB B(D0D8G 8A0A(B BBBH ZDKAmMYA` FEB A(A0 (A BBBD ^ (D BEBE   (F BBBD 4X FDA x GBL YAB rEb I A N( FDA AB4 uFDA G GBE QABL( FBB E(D0V (A BBBH X (A BBBI x !H FIE I(D0D8GP 8A0A(B BBBH H FII E(D0D8GPr 8A0A(B BBBH H$ bFFE B(D0A8D@ 8A0A(B BBBF Hp KFBE A(A0 (D BEBK k(A BBB@ FDD n ABA T ABI v ABA ( FJO DBB L, ^FBB E(A0D8G 8F0A(B BBBK | |LT H i O H ;FBB B(D0A8DP 8A0A(B BBBI NEF ^ AF `FEF B(A0A8Dp 8F0A(B BBBD  8D0A(B BBBA LtDFBB B(A0D8DG 8A0A(B BBBJ HIPBB B(A0A8D`e 8A0A(B BBBE LFBA D(D0W (A ABBB t (G ABBH H`FBB E(A0A8DP 8A0A(B BBBB HFBE B(G0D8N`  8A0A(B BBBD H*FBB B(D0D8H`9 8D0A(B BBBF `DOFB B(A0D8G@H 8A0A(B BBBI PH@H\PBB B(A0A8D`n 8A0A(B BBBD eLBBE B(A0A8G) 8D0A(B BBBF HXPBB E(D0A8DP 8D0A(B BBBC HFEB B(A0G8J  8A0A(B BBBG <3FEF A(A0 (A BBBF \0FBB B(D0D8DoUAk 8D0A(B BBBI HFOB B(A0A8DP4 8A0A(B BBBC dFBB B(A0C8J 8A0A(B BBBC y 8A0A(B BBBE D!XLlBBB B(A0A8G 8A0A(B BBBA AFHB B(A0D8GLJB OdB 8A0A(B BBBA E}B'GbAADRBREFv|L^LMHGFOB B(A0A8DP 8A0A(B BBBC LFBE B(D0A8Gs 8C0A(B BBBF LPdFBB B(H0H8N 8K0H(B BBBE |BBB B(A0A8DMTAl 8A0A(B BBBF [ 8A0A(B BBBE X 8A0A(B BBBE s 8G0A(B BBBE X 8A0A(B BBBE X 8A0A(B BBBE X 8A0A(B BBBE i 8G0A(B BBBE i 8G0A(B BBBE Y 8G0A(B BBBE X 8A0A(B BBBE X 8A0A(B BBBE X 8A0A(B BBBE  8A0A(B BBBE X 8A0A(B BBBE U 8G0A(B BBBE \ 8G0A(B BBBE X 8A0A(B BBBE X 8A0A(B BBBE X 8A0A(B BBBE X 8A0A(B BBBE i 8G0A(B BBBE n 8G0A(B BBBE  8A0A(B BBBE X 8A0A(B BBBE i 8G0A(B BBBE X 8A0A(B BBBE UH^At 8A0A(B BBBE X 8A0A(B BBBE X 8A0A(B BBBE X 8A0A(B BBBE X 8A0A(B BBBE m 8G0A(B BBBE X 8A0A(B BBBE U 8G0A(B BBBE X 8A0A(B BBBE X 8A0A(B BBBE e 8G0A(B BBBE X 8A0A(B BBBE X 8A0A(B BBBE X 8A0A(B BBBE X 8A0A(B BBBE  8A0A(B BBBE  8A0A(B BBBE X 8A0A(B BBBE i 8G0A(B BBBE X 8A0A(B BBBE X 8A0A(B BBBE X 8A0A(B BBBE U 8G0A(B BBBE Q 8G0A(B BBBE  8D0A(B BBBE X 8A0A(B BBBE X 8A0A(B BBBE X 8A0A(B BBBE X 8A0A(B BBBE i 8G0A(B BBBE  8A0A(B BBBE X 8A0A(B BBBE i 8G0A(B BBBE e 8G0A(B BBBE X 8A0A(B BBBE X 8A0A(B BBBE  8G0A(B BBBE X 8A0A(B BBBE X 8A0A(B BBBE X 8A0A(B BBBE X 8A0A(B BBBE X 8A0A(B BBBE X 8A0A(B BBBE X 8A0A(B BBBE X 8A0A(B BBBE i 8G0A(B BBBE X 8A0A(B BBBE i 8G0A(B BBBE X 8A0A(B BBBE X 8A0A(B BBBE X 8A0A(B BBBJ  8G0A(B BBBE \ 8D0A(B BBBE LeOBD D(D0d (F ABBD P0<BBH A(A0 (F BBBK D FFB D(E0G 0A(A BBBK dh?BBE B(D0D8D` 8A0A(B BBBG Z 8C0A(B BBBJ @BBB A(A0D 0D(A BBBA \ 0A(A BBBE \ 0A(A BBBE  0A(A BBBE X 0A(A BBBE  0A(A BBBE X 0A(A BBBE X 0A(A BBBE i 0G(A BBBE i 0G(A BBBE i 0G(A BBBE i 0G(A BBBE i 0G(A BBBE i 0G(A BBBE i 0G(A BBBE e 0G(A BBBE n 0G(A BBBE e 0G(A BBBE i 0G(A BBBE i 0G(A BBBE i 0G(A BBBE i 0G(A BBBE i 0G(A BBBE i 0G(A BBBE i 0G(A BBBE i 0G(A BBBE i 0G(A BBBE i 0G(A BBBE i 0G(A BBBE i 0G(A BBBE i 0G(A BBBE i 0G(A BBBE i 0G(A BBBE i 0G(A BBBE i 0G(A BBBE i 0G(A BBBE i 0G(A BBBE i 0G(A BBBE i 0G(A BBBE X 0A(A BBBE i 0G(A BBBE  0A(A BBBE X 0A(A BBBE X 0A(A BBBE X 0A(A BBBE  0G(A BBBE a 0D(A BBBE i 0G(A BBBE X 0A(A BBBE i 0G(A BBBE RZAk 0G(A BBBE X 0A(A BBBE X 0A(A BBBE X 0A(A BBBE  0G(A BBBE X 0A(A BBBE t 0G(A BBBE X 0A(A BBBE X 0A(A BBBE i 0G(A BBBE  0A(A BBBE p 0G(A BBBE p 0G(A BBBE e 0G(A BBBE X 0A(A BBBE X 0A(A BBBE i 0G(A BBBE X 0A(A BBBE t 0G(A BBBE X 0A(A BBBE X 0A(A BBBE e 0G(A BBBE X 0A(A BBBE X 0A(A BBBE i 0G(A BBBE X 0A(A BBBE X 0A(A BBBE X 0A(A BBBE X 0A(A BBBE X 0A(A BBBE i 0G(A BBBE \ 0A(A BBBE  0A(A BBBE i 0G(A BBBE X 0A(A BBBE X 0A(A BBBE e 0G(A BBBE \ 0A(A BBBE  0A(A BBBE X 0A(A BBBE X 0A(A BBBE  0A(A BBBE  0A(A BBBE X 0A(A BBBE X 0A(A BBBE l 0G(A BBBE X 0A(A BBBE p 0G(A BBBE X 0A(A BBBE i 0G(A BBBE  0A(A BBBE p 0G(A BBBE p 0G(A BBBE p 0G(A BBBE i 0G(A BBBE X 0A(A BBBE { 0A(A BBBE i 0G(A BBBE X 0A(A BBBE p 0G(A BBBE X 0A(A BBBE i 0G(A BBBE X 0A(A BBBE p 0G(A BBBE  0G(A BBBE i 0G(A BBBE X 0A(A BBBE i 0G(A BBBE p 0G(A BBBE X 0A(A BBBE i 0G(A BBBE X 0A(A BBBE X 0A(A BBBE  0G(A BBBE i 0G(A BBBE X 0A(A BBBE  0A(A BBBE X 0A(A BBBE i 0G(A BBBE X 0A(A BBBE X 0A(A BBBE  0G(A BBBE i 0G(A BBBE X 0A(A BBBE X 0A(A BBBE X 0A(A BBBE i 0G(A BBBE X 0A(A BBBE X 0A(A BBBE X 0A(A BBBE i 0G(A BBBE X 0A(A BBBE X 0A(A BBBE X 0A(A BBBE i 0G(A BBBE X 0A(A BBBE t 0G(A BBBE X 0A(A BBBE i 0G(A BBBE X 0A(A BBBE p 0G(A BBBE  0G(A BBBE i 0G(A BBBE n 0A(A BBBE h 0A(A BBBE X 0A(A BBBE i 0G(A BBBE X 0A(A BBBE T 0A(A BBBE X 0A(A BBBE i 0G(A BBBE X 0A(A BBBE e 0G(A BBBE X 0A(A BBBE i 0G(A BBBE X 0A(A BBBE p 0G(A BBBE X 0A(A BBBE i 0G(A BBBE X 0A(A BBBE X 0A(A BBBE X 0A(A BBBE i 0G(A BBBE i 0G(A BBBE X 0A(A BBBE i 0G(A BBBE i 0G(A BBBE X 0A(A BBBE p 0G(A BBBE X 0A(A BBBE i 0G(A BBBE X 0A(A BBBE p 0G(A BBBE X 0A(A BBBE i 0G(A BBBE X 0A(A BBBE i 0G(A BBBE X 0A(A BBBE i 0G(A BBBE \ 0A(A BBBE  0A(A BBBE i 0G(A BBBE X 0A(A BBBE Zb[Bk 0G(A BBBE p 0G(A BBBE X 0A(A BBBE X 0A(A BBBE  0G(A BBBE  0A(A BBBE X 0A(A BBBE  0A(A BBBE X 0A(A BBBE X 0A(A BBBE X 0A(A BBBI S 0G(A BBBE [ 0G(A BBBE X 0G(A BBBE X 0G(A BBBE X 0G(A BBBE X 0G(A BBBE Lx0 FBB B(A0A8G 8D0A(B BBBA 80BBA A(G (A ABBF 81BBA A(G (A ABBF T@1BBE B(D0A8Dp_xqKxApy 8G0A(B BBBE <1BDD D`e  GABK QhUpRhA`|1OFB E(D0A8DPk 8D0A(B BBBK D8F0A(B BBBEPP8X2/FBF D(D0u (A BBBF 82/FBF D(D0u (A BBBF H2FBB F(D0D8D@s 8D0A(B BBBD H3FBB F(D0D8D@\ 8D0A(B BBBK @h3HFBB A(D0HPa 0A(A BBBH <3YFBB D(D0J (D BBBJ H3BBB B(A0A8DX 8A0A(B BBBH 884BBA A(G (A ABBF 8t4BBA A(G (A ABBF 84BBA A(G (A ABBF 84BBA A(G (A ABBF 8(5BBA A(G (A ABBF 8d5BBA A(G (A ABBF 85BBA A(G (A ABBF 85BBA A(G (A ABBF 86BBA A(G (A ABBF 8T6BBA A(G (A ABBF 86BBA A(G (A ABBF 86BBA A(G (A ABBF 87BBA A(G (A ABBF 8D7BBA A(G (A ABBF 87BBA A(G (A ABBF 87BBA A(G (A ABBF 87BBA A(G (A ABBF 848BBA A(G (A ABBF 8p8BBA A(G (A ABBF 88BBA A(G (A ABBF 88BBA A(G (A ABBF 8$9BBA A(G (A ABBF 8`9BBA A(G (A ABBF 89BBA A(G (A ABBF 89BBA A(G (A ABBF 8:BBA A(G (A ABBF 8P:BBA A(G (A ABBF 8:BBA A(G (A ABBF 8:BBA A(G (A ABBF 8;BBA A(G (A ABBF 8@;BBA A(G (A ABBF 8|;BBA A(G (A ABBF 8;BBA A(G (A ABBF 8;BBA A(G (A ABBF 80<BBA A(G (A ABBF 8l<BBA A(G (A ABBF 8<BBA A(G (A ABBF 8<BBA A(G (A ABBF 8 =BBA A(G (A ABBF 8\=BBA A(G (A ABBF 8=BBA A(G (A ABBF 8=BBA A(G (A ABBF 8>BBA A(G (A ABBF 8L>BBA A(G (A ABBF 8>BBA A(G (A ABBF 8>BBA A(G (A ABBF 8?BBA A(G (A ABBF 8?@@! AFBC+ DEjFGHpIJyKsLn[M<[NSOR?P$QJR4SS T/UVLWlXBY1)ZT{[u\P]]k^Hc_2g`]a6UbN?cD7d>'e- f(f*hiijMkl.mnopr r#gstuvwyzz||}~ȀfɂÃ&$0z d{m3yya[OQCASk_h><2S06<4O0@)<8W$b%  ,,JD:00 ' *"Qpygekuci]]   "$&(*,.02468:<>@BDFHJLNPRTVXZ\^`bdfhjlnprtvxz|~   !#%')+-/13579;=?ACEGIKMOQSUWY[]qUGZH_HeHkH9qHRwHe}xH<H H*H6H?HHG0HRR(QQQ;QYQoQQQYYY YY Y YYx#x)t/t5y;yAGMSzY_ ekqw$}zHHHHHGhGz G@HGHH GGHOGHgH2GhH HXH& H/GGG%H+H1H7G=HCGPIH~OHsUG[G(aHgHmG`sHyHGHHGHHHGGxz0%?&'/14R5n=>ANBCHIJKOMN?OP/Qie"" "N"}"" ""L"""("j"""4"e""" " O" """0" \" """ " B""*"T"*""*"z"" R" " -" "  " " "["">""/.wY"/"/"""%"pY"!: 5"#Ax"%Q6W!ePu"'"'")%<"+ZX!gq"+Z"-/p"/"1"3"5}"7A"92";i"=ZD"=Zn"?+!_ "?"ABX_"C"E_F"E_"I{U!ch"I{"Kkr"S"W "Z .N2f"Ks5i%@"\b"^"^"`r"`r"bN"bN"c/"cA"eu"eu"g"i!m"i!"j] s   3 _   "jL "l "l "nb  "nbP "pK "pK "r, t"u^ d>"w|"z;9A"|Nv"~L"D 06Zd"I""[""*"_"\'^!o8"@!iT8`""T'c"3 "Dx"]"!m8!k8 "X"!"An      5! M!T!! !! !`"("v""#Y##H$$$$7%m%%%%%%%2&&&B'I q'''(:(( ))))"N*"^x*"G*"R+_+w++++!a ++P+,A,v,,"-"-"f-"Y.|." ."/"^/"E9/"yD0o00"1" 1"[1"&2"2"3"5 l3"3"h04"Y4"4"xY5"] 5"{=6"6"7"eO7"77"?=8"8"9"9"9"e:":";;";";x;"Gh<"l<" ="Y="U="P>" >"?"?"?"xfH@"Q@"K0A"lA"A<B"IB"cB"ZC"dC"6D"D"oD"kE"E"(F"F"G"G" G"lBHtHHH"H#IKIIIIJRJsJJJJKDKpKKK"/CL"/L"M"rM"HM"YONNN"tIO"v OO"*P"|jP"~]P0QmQQQ"ZRRRRRS"vkS"xS"=GT"T"%U"58U"=U"<jV"=V"=0W"XW"XX"XX"XX"=eY"=Y"=7Z"=Z"=Z"=\["["=8\i\"\\"`]]]]7^k^^^^,_w_"7_"9Q``s`t a)a=alaaa$b\bbbb cFcccdEdydd!udmain.cpp_ZNSt11_Deque_baseIPN5clang4ExprESaIS2_EE17_M_initialize_mapEm.constprop.0_ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES5_St9_IdentityIS5_ESt4lessIS5_ESaIS5_EE8_M_eraseEPSt13_Rb_tree_nodeIS5_E.isra.0_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPcEEvT_S7_St20forward_iterator_tag.constprop.0_ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE8_M_eraseEPSt13_Rb_tree_nodeIS8_E.isra.0_ZNSt8_Rb_treeIN9Generator3TagES1_St9_IdentityIS1_ESt4lessIS1_ESaIS1_EE8_M_eraseEPSt13_Rb_tree_nodeIS1_E.isra.0_ZSt16__insertion_sortIN9__gnu_cxx17__normal_iteratorIPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt6vectorIS7_SaIS7_EEEENS0_5__ops15_Iter_less_iterEEvT_SF_T0_.isra.0_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEC2IS3_EEPKcRKS3_.constprop.0_ZNSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS5_EE12emplace_backIJS5_EEERS5_DpOT_.isra.0_ZL14proceedCommandSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS5_EEN4llvm9StringRefES9_PN5clang11FileManagerE12DatabaseType_ZSt16__introsort_loopIN9__gnu_cxx17__normal_iteratorIPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt6vectorIS7_SaIS7_EEEElNS0_5__ops15_Iter_less_iterEEvT_SF_T0_T1_.isra.0_ZL13EmbeddedFiles_GLOBAL__sub_I__Z9BuildPathB5cxx11_ZStL8__ioinit_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE12TraverseDeclEPNS_4DeclE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE20TraverseCapturedDeclEPNS_12CapturedDeclE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE25TraverseDeclContextHelperEPNS_11DeclContextE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE18TraverseExportDeclEPNS_10ExportDeclE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE26TraverseExternCContextDeclEPNS_18ExternCContextDeclE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE17TraverseUsingDeclEPNS_9UsingDeclE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE24TraverseFileScopeAsmDeclEPNS_16FileScopeAsmDeclE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE21TraverseUsingEnumDeclEPNS_13UsingEnumDeclE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE23TraverseLinkageSpecDeclEPNS_15LinkageSpecDeclE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE28TraverseObjCCategoryImplDeclEPNS_20ObjCCategoryImplDeclE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE25TraverseClassTemplateDeclEPNS_17ClassTemplateDeclE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE24TraverseStaticAssertDeclEPNS_16StaticAssertDeclE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE24TraverseTopLevelStmtDeclEPNS_16TopLevelStmtDeclE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE25TraverseCXXConversionDeclEPNS_17CXXConversionDeclE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE29TraverseTypeAliasTemplateDeclEPNS_21TypeAliasTemplateDeclE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE27TraverseBuiltinTemplateDeclEPNS_19BuiltinTemplateDeclE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE22TraverseMSPropertyDeclEPNS_14MSPropertyDeclE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE30TraverseObjCImplementationDeclEPNS_22ObjCImplementationDeclE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE28TraverseFunctionTemplateDeclEPNS_20FunctionTemplateDeclE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE25TraverseCXXDestructorDeclEPNS_17CXXDestructorDeclE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE26TraverseCXXConstructorDeclEPNS_18CXXConstructorDeclE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE23TraverseVarTemplateDeclEPNS_15VarTemplateDeclE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE19TraverseConceptDeclEPNS_11ConceptDeclE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE24TraverseObjCCategoryDeclEPNS_16ObjCCategoryDeclE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE18TraverseLambdaExprEPNS_10LambdaExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE16dataTraverseNodeEPNS_4StmtEPN4llvm15SmallVectorImplINS5_14PointerIntPairIS4_Lj1EbNS5_21PointerLikeTypeTraitsIS4_EENS5_18PointerIntPairInfoIS4_Lj1ES9_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE31TraverseObjCAutoreleasePoolStmtEPNS_23ObjCAutoreleasePoolStmtEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE21TraverseObjCAtTryStmtEPNS_13ObjCAtTryStmtEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE23TraverseObjCAtThrowStmtEPNS_15ObjCAtThrowStmtEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE25TraverseObjCAtFinallyStmtEPNS_17ObjCAtFinallyStmtEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE23TraverseObjCAtCatchStmtEPNS_15ObjCAtCatchStmtEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE30TraverseOMPExecutableDirectiveEPNS_22OMPExecutableDirectiveE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE27TraverseOMPSectionDirectiveEPNS_19OMPSectionDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE37TraverseOMPTargetParallelForDirectiveEPNS_29OMPTargetParallelForDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE34TraverseOMPTargetParallelDirectiveEPNS_26OMPTargetParallelDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE26TraverseOMPMasterDirectiveEPNS_18OMPMasterDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE29TraverseOMPTaskgroupDirectiveEPNS_21OMPTaskgroupDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE34TraverseOMPParallelMaskedDirectiveEPNS_26OMPParallelMaskedDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE26TraverseOMPTargetDirectiveEPNS_18OMPTargetDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE36TraverseOMPTeamsGenericLoopDirectiveEPNS_28OMPTeamsGenericLoopDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE36TraverseOMPParallelSectionsDirectiveEPNS_28OMPParallelSectionsDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE34TraverseOMPParallelMasterDirectiveEPNS_26OMPParallelMasterDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE24TraverseOMPScanDirectiveEPNS_16OMPScanDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE34TraverseOMPTargetExitDataDirectiveEPNS_26OMPTargetExitDataDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE35TraverseOMPTargetEnterDataDirectiveEPNS_27OMPTargetEnterDataDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE26TraverseOMPSingleDirectiveEPNS_18OMPSingleDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE30TraverseOMPTargetDataDirectiveEPNS_22OMPTargetDataDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE28TraverseOMPSectionsDirectiveEPNS_20OMPSectionsDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE27TraverseOMPOrderedDirectiveEPNS_19OMPOrderedDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE26TraverseOMPUnrollDirectiveEPNS_18OMPUnrollDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE28TraverseOMPParallelDirectiveEPNS_20OMPParallelDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE32TraverseOMPTargetUpdateDirectiveEPNS_24OMPTargetUpdateDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE24TraverseOMPMetaDirectiveEPNS_16OMPMetaDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE50TraverseOMPTeamsDistributeParallelForSimdDirectiveEPNS_42OMPTeamsDistributeParallelForSimdDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE25TraverseOMPTeamsDirectiveEPNS_17OMPTeamsDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE26TraverseOMPMaskedDirectiveEPNS_18OMPMaskedDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE24TraverseOMPTaskDirectiveEPNS_16OMPTaskDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE39TraverseOMPTeamsDistributeSimdDirectiveEPNS_31OMPTeamsDistributeSimdDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE28TraverseOMPTaskwaitDirectiveEPNS_20OMPTaskwaitDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE24TraverseOMPTileDirectiveEPNS_16OMPTileDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE31TraverseOMPTargetTeamsDirectiveEPNS_23OMPTargetTeamsDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE46TraverseOMPTeamsDistributeParallelForDirectiveEPNS_38OMPTeamsDistributeParallelForDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE35TraverseOMPTeamsDistributeDirectiveEPNS_27OMPTeamsDistributeDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE25TraverseCXXDefaultArgExprEPNS_17CXXDefaultArgExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE28TraverseOMPDispatchDirectiveEPNS_20OMPDispatchDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE18TraverseGCCAsmStmtEPNS_10GCCAsmStmtEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE17TraverseWhileStmtEPNS_9WhileStmtEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE18TraverseAtomicExprEPNS_10AtomicExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE16TraverseNullStmtEPNS_8NullStmtEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE20TraverseCapturedStmtEPNS_12CapturedStmtEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE20TraverseCXXCatchStmtEPNS_12CXXCatchStmtEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE28TraverseCXXBindTemporaryExprEPNS_20CXXBindTemporaryExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE26TraverseOMPCancelDirectiveEPNS_18OMPCancelDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE17TraverseLabelStmtEPNS_9LabelStmtEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE21TraverseUnaryOperatorEPNS_13UnaryOperatorEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE32TraverseUnaryExprOrTypeTraitExprEPNS_24UnaryExprOrTypeTraitExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE25TraverseArrayInitLoopExprEPNS_17ArrayInitLoopExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE26TraverseCUDAKernelCallExprEPNS_18CUDAKernelCallExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE21TraverseCXXUuidofExprEPNS_13CXXUuidofExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE34TraverseCXXUnresolvedConstructExprEPNS_26CXXUnresolvedConstructExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE20TraverseContinueStmtEPNS_12ContinueStmtEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE24TraverseCXXConstructExprEPNS_16CXXConstructExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE28TraverseOMPCriticalDirectiveEPNS_20OMPCriticalDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE17TraverseMSAsmStmtEPNS_9MSAsmStmtEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE26TraverseArrayTypeTraitExprEPNS_18ArrayTypeTraitExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE18TraverseCXXNewExprEPNS_10CXXNewExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE27TraverseCXXOperatorCallExprEPNS_19CXXOperatorCallExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE18TraverseCXXTryStmtEPNS_10CXXTryStmtEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE30TraverseCompoundAssignOperatorEPNS_22CompoundAssignOperatorEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE26TraverseOMPAtomicDirectiveEPNS_18OMPAtomicDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE20TraverseSEHLeaveStmtEPNS_12SEHLeaveStmtEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE17TraverseVAArgExprEPNS_9VAArgExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE21TraverseAddrLabelExprEPNS_13AddrLabelExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE21TraverseCXXDeleteExprEPNS_13CXXDeleteExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE16TraverseCallExprEPNS_8CallExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE25TraverseConvertVectorExprEPNS_17ConvertVectorExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE30TraverseCXXTemporaryObjectExprEPNS_22CXXTemporaryObjectExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE26TraverseOMPDepobjDirectiveEPNS_18OMPDepobjDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE22TraverseAttributedStmtEPNS_14AttributedStmtEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE20TraverseCompoundStmtEPNS_12CompoundStmtEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE18TraverseAsTypeExprEPNS_10AsTypeExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE29TraverseMSDependentExistsStmtEPNS_21MSDependentExistsStmtEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE18TraverseReturnStmtEPNS_10ReturnStmtEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE27TraverseOMPBarrierDirectiveEPNS_19OMPBarrierDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE18TraverseSEHTryStmtEPNS_10SEHTryStmtEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE37TraverseOMPCancellationPointDirectiveEPNS_29OMPCancellationPointDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE26TraverseArrayInitIndexExprEPNS_18ArrayInitIndexExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE32TraverseCXXInheritedCtorInitExprEPNS_24CXXInheritedCtorInitExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE25TraverseCXXMemberCallExprEPNS_17CXXMemberCallExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE26TraverseCXXDefaultInitExprEPNS_18CXXDefaultInitExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE19TraverseDefaultStmtEPNS_11DefaultStmtEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE26TraverseArraySubscriptExprEPNS_18ArraySubscriptExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE22TraverseBinaryOperatorEPNS_14BinaryOperatorEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE21TraverseCXXTypeidExprEPNS_13CXXTypeidExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE19TraverseCXXThisExprEPNS_11CXXThisExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE33TraverseCXXStdInitializerListExprEPNS_25CXXStdInitializerListExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE28TraverseOMPTaskLoopDirectiveEPNS_20OMPTaskLoopDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE27TraverseObjCBoolLiteralExprEPNS_19ObjCBoolLiteralExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE26TraverseBuiltinBitCastExprEPNS_18BuiltinBitCastExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE28TraverseUnresolvedMemberExprEPNS_20UnresolvedMemberExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE31TraverseOMPParallelForDirectiveEPNS_23OMPParallelForDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE29TraverseCXXNullPtrLiteralExprEPNS_21CXXNullPtrLiteralExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE30TraverseCXXReinterpretCastExprEPNS_22CXXReinterpretCastExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE26TraverseCXXDynamicCastExprEPNS_18CXXDynamicCastExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE32TraverseSYCLUniqueStableNameExprEPNS_24SYCLUniqueStableNameExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE41TraverseOMPTargetParallelForSimdDirectiveEPNS_33OMPTargetParallelForSimdDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE27TraverseMatrixSubscriptExprEPNS_19MatrixSubscriptExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE27TraverseObjCPropertyRefExprEPNS_19ObjCPropertyRefExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE34TraverseOMPDistributeSimdDirectiveEPNS_26OMPDistributeSimdDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE28TraverseExtVectorElementExprEPNS_20ExtVectorElementExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE30TraverseCXXScalarValueInitExprEPNS_22CXXScalarValueInitExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE16TraverseStmtExprEPNS_8StmtExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE52TraverseOMPTargetTeamsDistributeParallelForDirectiveEPNS_44OMPTargetTeamsDistributeParallelForDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE27TraverseOMPArrayShapingExprEPNS_19OMPArrayShapingExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE27TraverseObjCBridgedCastExprEPNS_19ObjCBridgedCastExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE34TraverseOMPMaskedTaskLoopDirectiveEPNS_26OMPMaskedTaskLoopDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE27TraverseOMPForSimdDirectiveEPNS_19OMPForSimdDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE24TraverseExprWithCleanupsEPNS_16ExprWithCleanupsEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE23TraverseOMPForDirectiveEPNS_15OMPForDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE24TraverseCXXConstCastExprEPNS_16CXXConstCastExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE22TraversePredefinedExprEPNS_14PredefinedExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE46TraverseOMPParallelMaskedTaskLoopSimdDirectiveEPNS_38OMPParallelMaskedTaskLoopSimdDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE15TraverseForStmtEPNS_7ForStmtEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE27TraverseOMPInteropDirectiveEPNS_19OMPInteropDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE25TraverseOMPErrorDirectiveEPNS_17OMPErrorDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE16TraverseTypoExprEPNS_8TypoExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE20TraverseCXXThrowExprEPNS_12CXXThrowExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE36TraverseSubstNonTypeTemplateParmExprEPNS_28SubstNonTypeTemplateParmExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE45TraverseOMPTargetTeamsDistributeSimdDirectiveEPNS_37OMPTargetTeamsDistributeSimdDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE24TraverseObjCArrayLiteralEPNS_16ObjCArrayLiteralEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE24TraverseCharacterLiteralEPNS_16CharacterLiteralEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE34TraverseOMPMasterTaskLoopDirectiveEPNS_26OMPMasterTaskLoopDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE38TraverseOMPMasterTaskLoopSimdDirectiveEPNS_30OMPMasterTaskLoopSimdDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE19TraverseGNUNullExprEPNS_11GNUNullExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE23TraverseCXXNoexceptExprEPNS_15CXXNoexceptExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE20TraverseRecoveryExprEPNS_12RecoveryExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE46TraverseOMPParallelMasterTaskLoopSimdDirectiveEPNS_38OMPParallelMasterTaskLoopSimdDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE31TraverseMSPropertySubscriptExprEPNS_23MSPropertySubscriptExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE14TraverseIfStmtEPNS_6IfStmtEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE23TraverseObjCIvarRefExprEPNS_15ObjCIvarRefExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE41TraverseOMPDistributeParallelForDirectiveEPNS_33OMPDistributeParallelForDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE32TraverseDesignatedInitUpdateExprEPNS_24DesignatedInitUpdateExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE31TraverseCXXPseudoDestructorExprEPNS_23CXXPseudoDestructorExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE22TraverseSizeOfPackExprEPNS_14SizeOfPackExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE30TraverseOMPTargetSimdDirectiveEPNS_22OMPTargetSimdDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE18TraverseNoInitExprEPNS_10NoInitExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE27TraverseCompoundLiteralExprEPNS_19CompoundLiteralExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE24TraverseObjCSelectorExprEPNS_16ObjCSelectorExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE31TraverseOMPGenericLoopDirectiveEPNS_23OMPGenericLoopDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE23TraverseFloatingLiteralEPNS_15FloatingLiteralEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE29TraverseCXXFunctionalCastExprEPNS_21CXXFunctionalCastExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE39TraverseOMPParallelGenericLoopDirectiveEPNS_31OMPParallelGenericLoopDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE42TraverseOMPParallelMaskedTaskLoopDirectiveEPNS_34OMPParallelMaskedTaskLoopDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE28TraverseSynOrSemInitListExprEPNS_12InitListExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE14TraverseDoStmtEPNS_6DoStmtEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE29TraverseObjCDictionaryLiteralEPNS_21ObjCDictionaryLiteralEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE25TraverseOMPFlushDirectiveEPNS_17OMPFlushDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE19TraverseDeclRefExprEPNS_11DeclRefExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE40TraverseSubstNonTypeTemplateParmPackExprEPNS_32SubstNonTypeTemplateParmPackExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE21TraverseStringLiteralEPNS_13StringLiteralEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE42TraverseOMPTargetTeamsGenericLoopDirectiveEPNS_34OMPTargetTeamsGenericLoopDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE33TraverseObjCAvailabilityCheckExprEPNS_25ObjCAvailabilityCheckExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE26TraverseUserDefinedLiteralEPNS_18UserDefinedLiteralEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE28TraverseUnresolvedLookupExprEPNS_20UnresolvedLookupExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE38TraverseOMPMaskedTaskLoopSimdDirectiveEPNS_30OMPMaskedTaskLoopSimdDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE28TraverseGenericSelectionExprEPNS_20GenericSelectionExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE28TraverseCXXAddrspaceCastExprEPNS_20CXXAddrspaceCastExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE20TraverseRequiresExprEPNS_12RequiresExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE24TraverseOMPSimdDirectiveEPNS_16OMPSimdDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE32TraverseMaterializeTemporaryExprEPNS_24MaterializeTemporaryExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE24TraverseIndirectGotoStmtEPNS_16IndirectGotoStmtEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE23TraverseObjCMessageExprEPNS_15ObjCMessageExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE45TraverseOMPDistributeParallelForSimdDirectiveEPNS_37OMPDistributeParallelForSimdDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE27TraverseExpressionTraitExprEPNS_19ExpressionTraitExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE28TraverseCXXParenListInitExprEPNS_20CXXParenListInitExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE41TraverseOMPTargetTeamsDistributeDirectiveEPNS_33OMPTargetTeamsDistributeDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE45TraverseOMPTargetParallelGenericLoopDirectiveEPNS_37OMPTargetParallelGenericLoopDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE27TraverseOMPArraySectionExprEPNS_19OMPArraySectionExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE25TraverseCXXStaticCastExprEPNS_17CXXStaticCastExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE25TraverseObjCStringLiteralEPNS_17ObjCStringLiteralEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE30TraverseOMPDistributeDirectiveEPNS_22OMPDistributeDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE20TraverseConstantExprEPNS_12ConstantExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE22TraverseCStyleCastExprEPNS_14CStyleCastExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE21TraverseParenListExprEPNS_13ParenListExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE35TraverseOMPParallelForSimdDirectiveEPNS_27OMPParallelForSimdDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE22TraverseIntegerLiteralEPNS_14IntegerLiteralEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE32TraverseOMPTaskLoopSimdDirectiveEPNS_24OMPTaskLoopSimdDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE22TraverseObjCEncodeExprEPNS_14ObjCEncodeExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE56TraverseOMPTargetTeamsDistributeParallelForSimdDirectiveEPNS_48OMPTargetTeamsDistributeParallelForSimdDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE25TraverseShuffleVectorExprEPNS_17ShuffleVectorExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE42TraverseOMPParallelMasterTaskLoopDirectiveEPNS_34OMPParallelMasterTaskLoopDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE23TraverseOMPIteratorExprEPNS_15OMPIteratorExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE16TraverseGotoStmtEPNS_8GotoStmtEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE20TraverseOffsetOfExprEPNS_12OffsetOfExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE18TraverseChooseExprEPNS_10ChooseExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE28TraverseFunctionParmPackExprEPNS_20FunctionParmPackExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE25TraverseMSPropertyRefExprEPNS_17MSPropertyRefExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE19TraverseObjCIsaExprEPNS_11ObjCIsaExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE26TraverseDesignatedInitExprEPNS_18DesignatedInitExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE18TraverseMemberExprEPNS_10MemberExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE24TraverseObjCProtocolExprEPNS_16ObjCProtocolExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE25TraverseFixedPointLiteralEPNS_17FixedPointLiteralEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE21TraverseObjCBoxedExprEPNS_13ObjCBoxedExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.isra.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE29TraverseOMPTaskyieldDirectiveEPNS_21OMPTaskyieldDirectiveEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE33TraverseDependentScopeDeclRefExprEPNS_25DependentScopeDeclRefExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE35TraverseCXXDependentScopeMemberExprEPNS_27CXXDependentScopeMemberExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE21TraverseTypeTraitExprEPNS_13TypeTraitExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE33TraverseConceptSpecializationExprEPNS_25ConceptSpecializationExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE24TraverseCXXConstructExprEPNS_16CXXConstructExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE30TraverseCXXTemporaryObjectExprEPNS_22CXXTemporaryObjectExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE26TraverseDesignatedInitExprEPNS_18DesignatedInitExprEPN4llvm15SmallVectorImplINS5_14PointerIntPairIPNS_4StmtELj1EbNS5_21PointerLikeTypeTraitsIS9_EENS5_18PointerIntPairInfoIS9_Lj1ESB_EEEEEE.part.0.LC0.LC1.LC22.LC23.LC35.LC39.LC40.LC41.LC36.LC32.LC33.LC34.LC37.LC38.LC42.LC31.LC30.LC10.LC11.LC2.LC3.LC4.LC5.LC6.LC7.LC8.LC9.LC17.LC13.LC15.LC16.LC18.LC14.LC19.LC12.LC20.LC21.LC24.LC25.LC26.LC28.LC27.LC29.LC43.LC44.LC45.LC46.LC47.LC48.LC49.LC50.LC51.LC52.LC53.LC55.LC56.LC57.LC58.LC62.LC68.LC69.LC72.LC66.LC71.LC73.LC74.LC75.LC81.LC83.LC84.LC80.LC70.LC77.LC82.LC78.LC79.LC63.LC64.LC67.LC54.LC65.LC61.LC60.LC59.LC86.LC88.LC87.LC89.LC90.LC92.LC91.LC93.LC95.LC94.LC96.LC98.LC97.LC99.LC101.LC100.LC102.LC103.LC104.LC107.LC106.LC108.LC109_ZNSt23_Sp_counted_ptr_inplaceIN5clang22PCHContainerOperationsESaIS1_ELN9__gnu_cxx12_Lock_policyE2EED5Ev_ZNSt23_Sp_counted_ptr_inplaceIN4llvm3sys2fs6detail15RecDirIterStateESaIS4_ELN9__gnu_cxx12_Lock_policyE2EED5Ev_ZNSt23_Sp_counted_ptr_inplaceIN4llvm3sys2fs6detail12DirIterStateESaIS4_ELN9__gnu_cxx12_Lock_policyE2EED5Ev_ZN13BrowserActionD5Ev_ZN23BrowserDiagnosticClientD5Ev_ZN18BrowserASTConsumerD5Ev_ZN4llvm2cl3optIbLb0ENS0_6parserIbEEED5Ev_ZN4llvm2cl3optINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEELb0ENS0_6parserIS7_EEED5Ev_ZNSt3setINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4lessIS5_ESaIS5_EED5Ev_ZN4llvm2cl4listINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEbNS0_6parserIS7_EEED5Ev_ZN9GeneratorD5Ev_ZN14ProjectManagerD5Ev_ZN11ProjectInfoD5Ev_ZN11ProjectInfoC5EOS__ZNSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS5_EED5Ev_ZN4llvm7ErrorOrISt10unique_ptrINS_12MemoryBufferESt14default_deleteIS2_EEED5Ev_ZN4llvm2cl3optINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEELb0ENS0_6parserIS7_EEEC5IJA2_cNS0_10value_descENS0_4descENS0_18NumOccurrencesFlagEEEEDpRKT__ZN4llvm2cl4listINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEbNS0_6parserIS7_EEEC5IJA2_cNS0_10value_descENS0_4descENS0_18NumOccurrencesFlagEEEEDpRKT__ZNSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS5_EEC5ERKS7__ZNSt6vectorIN5clang7tooling14CompileCommandESaIS2_EED5Ev_ZN4llvm3sys2fs28recursive_directory_iteratorC5ERKNS_5TwineERSt10error_codeb_ZNKSt5ctypeIcE8do_widenEc_ZN5clang18DiagnosticConsumer5clearEv_ZN5clang18DiagnosticConsumer15BeginSourceFileERKNS_11LangOptionsEPKNS_12PreprocessorE_ZN5clang18DiagnosticConsumer13EndSourceFileEv_ZN5clang18DiagnosticConsumer6finishEv_ZNK5clang9BlockDecl7getBodyEv_ZN5clang11ASTConsumer30HandleInlineFunctionDefinitionEPNS_12FunctionDeclE_ZN5clang11ASTConsumer23HandleTagDeclDefinitionEPNS_7TagDeclE_ZN5clang11ASTConsumer31HandleTagDeclRequiredDefinitionEPKNS_7TagDeclE_ZN5clang11ASTConsumer38HandleCXXImplicitFunctionInstantiationEPNS_12FunctionDeclE_ZN5clang11ASTConsumer27CompleteTentativeDefinitionEPNS_7VarDeclE_ZN5clang11ASTConsumer27CompleteExternalDeclarationEPNS_7VarDeclE_ZN5clang11ASTConsumer22AssignInheritanceModelEPNS_13CXXRecordDeclE_ZN5clang11ASTConsumer37HandleCXXStaticMemberVarInstantiationEPNS_7VarDeclE_ZN5clang11ASTConsumer12HandleVTableEPNS_13CXXRecordDeclE_ZN5clang11ASTConsumer22GetASTMutationListenerEv_ZN5clang11ASTConsumer29GetASTDeserializationListenerEv_ZN5clang11ASTConsumer10PrintStatsEv_ZN5clang14FrontendAction22PrepareToExecuteActionERNS_16CompilerInstanceE_ZN5clang14FrontendAction15BeginInvocationERNS_16CompilerInstanceE_ZN5clang14FrontendAction21BeginSourceFileActionERNS_16CompilerInstanceE_ZN5clang14FrontendAction19EndSourceFileActionEv_ZNK5clang14FrontendAction20isModelParsingActionEv_ZN5clang14FrontendAction22getTranslationUnitKindEv_ZNK5clang14FrontendAction13hasPCHSupportEv_ZNK5clang14FrontendAction17hasASTFileSupportEv_ZNK5clang14FrontendAction12hasIRSupportEv_ZNK5clang17ASTFrontendAction20usesPreprocessorOnlyEv_ZNK5clang14ObjCMethodDecl7hasBodyEv_ZNK13BrowserAction24hasCodeCompletionSupportEv_ZNSt17_Function_handlerIFvRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEN4llvm2cl3optIS5_Lb0ENSA_6parserIS5_EEE8CallbackMUlS7_E_EE9_M_invokeERKSt9_Any_dataS7__ZNSt17_Function_handlerIFvRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEN4llvm2cl3optIS5_Lb0ENSA_6parserIS5_EEE8CallbackMUlS7_E_EE10_M_managerERSt9_Any_dataRKSH_St18_Manager_operation_ZNSt17_Function_handlerIFvRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEN4llvm2cl4listIS5_bNSA_6parserIS5_EEE8CallbackMUlS7_E_EE9_M_invokeERKSt9_Any_dataS7__ZNSt17_Function_handlerIFvRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEN4llvm2cl4listIS5_bNSA_6parserIS5_EEE8CallbackMUlS7_E_EE10_M_managerERSt9_Any_dataRKSH_St18_Manager_operation_ZNSt17_Function_handlerIFvRKbEN4llvm2cl3optIbLb0ENS4_6parserIbEEE8CallbackMUlS1_E_EE9_M_invokeERKSt9_Any_dataS1__ZNSt17_Function_handlerIFvRKbEN4llvm2cl3optIbLb0ENS4_6parserIbEEE8CallbackMUlS1_E_EE10_M_managerERSt9_Any_dataRKSB_St18_Manager_operation_ZNSt23_Sp_counted_ptr_inplaceIN5clang22PCHContainerOperationsESaIS1_ELN9__gnu_cxx12_Lock_policyE2EED2Ev_ZNSt23_Sp_counted_ptr_inplaceIN5clang22PCHContainerOperationsESaIS1_ELN9__gnu_cxx12_Lock_policyE2EED1Ev_ZNSt23_Sp_counted_ptr_inplaceIN4llvm3sys2fs6detail15RecDirIterStateESaIS4_ELN9__gnu_cxx12_Lock_policyE2EED2Ev_ZNSt23_Sp_counted_ptr_inplaceIN4llvm3sys2fs6detail15RecDirIterStateESaIS4_ELN9__gnu_cxx12_Lock_policyE2EED1Ev_ZNSt23_Sp_counted_ptr_inplaceIN4llvm3sys2fs6detail12DirIterStateESaIS4_ELN9__gnu_cxx12_Lock_policyE2EED2Ev_ZNSt23_Sp_counted_ptr_inplaceIN4llvm3sys2fs6detail12DirIterStateESaIS4_ELN9__gnu_cxx12_Lock_policyE2EED1Ev_ZNK4llvm2cl4listINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEbNS0_6parserIS7_EEE27getValueExpectedFlagDefaultEv_ZNK4llvm2cl4listINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEbNS0_6parserIS7_EEE16printOptionValueEmb_ZN4llvm2cl4listINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEbNS0_6parserIS7_EEE19getExtraOptionNamesERNS_15SmallVectorImplINS_9StringRefEEE_ZNK4llvm2cl15OptionValueCopyIbE7compareERKNS0_18GenericOptionValueE_ZNSt23_Sp_counted_ptr_inplaceIN4llvm3sys2fs6detail12DirIterStateESaIS4_ELN9__gnu_cxx12_Lock_policyE2EED0Ev_ZdlPvm_ZNSt23_Sp_counted_ptr_inplaceIN4llvm3sys2fs6detail15RecDirIterStateESaIS4_ELN9__gnu_cxx12_Lock_policyE2EED0Ev_ZNSt23_Sp_counted_ptr_inplaceIN5clang22PCHContainerOperationsESaIS1_ELN9__gnu_cxx12_Lock_policyE2EED0Ev_ZNSt23_Sp_counted_ptr_inplaceIN4llvm3sys2fs6detail12DirIterStateESaIS4_ELN9__gnu_cxx12_Lock_policyE2EE14_M_get_deleterERKSt9type_info_ZZNSt19_Sp_make_shared_tag5_S_tiEvE5__tag_ZNSt19_Sp_make_shared_tag5_S_eqERKSt9type_info_ZNSt23_Sp_counted_ptr_inplaceIN4llvm3sys2fs6detail15RecDirIterStateESaIS4_ELN9__gnu_cxx12_Lock_policyE2EE14_M_get_deleterERKSt9type_info_ZNSt23_Sp_counted_ptr_inplaceIN5clang22PCHContainerOperationsESaIS1_ELN9__gnu_cxx12_Lock_policyE2EE14_M_get_deleterERKSt9type_info_ZN13BrowserActionD2Ev_ZTVN5clang17ASTFrontendActionE_ZN5clang14FrontendActionD2Ev_ZN13BrowserActionD1Ev_ZN13BrowserActionD0Ev_ZN18BrowserASTConsumer22shouldSkipFunctionBodyEPN5clang4DeclE_ZNK5clang13FullSourceLoc15getExpansionLocEv_ZNK5clang13FullSourceLoc9getFileIDEv_ZN9Annotator13shouldProcessEN5clang6FileIDE__stack_chk_fail_ZNK5clang12FunctionDecl7getBodyEv_ZNK5clang12FunctionDecl7getBodyERPKS0__ZN18BrowserASTConsumer18HandleTopLevelDeclEN5clang12DeclGroupRefE_ZN5clang17DiagnosticsEngine5ResetEb_ZN18BrowserASTConsumer10InitializeERN5clang10ASTContextE_ZN5clang10ASTContext19createMangleContextEPKNS_10TargetInfoE_Znwm_ZTV20PreprocessorCallback_ZTVN5clang18PPChainedCallbacksE_ZTV23BrowserDiagnosticClient_ZN5clang17DiagnosticsEngine9setClientEPNS_18DiagnosticConsumerEb_ZN23BrowserDiagnosticClientD2Ev_ZN5clang18DiagnosticConsumerD2Ev_ZN23BrowserDiagnosticClientD1Ev_ZN23BrowserDiagnosticClientD0Ev_ZN18BrowserASTConsumerD2Ev_ZTV18BrowserASTConsumer_ZTVN5clang20IgnoringDiagConsumerE_ZN9AnnotatorD1Ev_ZN18BrowserASTConsumerD1Ev_ZNK4llvm2cl4listINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEbNS0_6parserIS7_EEE15printOptionInfoEm_ZNK4llvm2cl17basic_parser_impl15printOptionInfoERKNS0_6OptionEm_ZNK4llvm2cl4listINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEbNS0_6parserIS7_EEE14getOptionWidthEv_ZNK4llvm2cl17basic_parser_impl14getOptionWidthERKNS0_6OptionE_ZNSt23_Sp_counted_ptr_inplaceIN5clang22PCHContainerOperationsESaIS1_ELN9__gnu_cxx12_Lock_policyE2EE10_M_destroyEv_ZNSt23_Sp_counted_ptr_inplaceIN4llvm3sys2fs6detail15RecDirIterStateESaIS4_ELN9__gnu_cxx12_Lock_policyE2EE10_M_destroyEv_ZNSt23_Sp_counted_ptr_inplaceIN4llvm3sys2fs6detail12DirIterStateESaIS4_ELN9__gnu_cxx12_Lock_policyE2EE10_M_destroyEv_ZNSt23_Sp_counted_ptr_inplaceIN5clang22PCHContainerOperationsESaIS1_ELN9__gnu_cxx12_Lock_policyE2EE10_M_disposeEvfree_ZN4llvm17deallocate_bufferEPvmm_ZNSt23_Sp_counted_ptr_inplaceIN4llvm3sys2fs6detail12DirIterStateESaIS4_ELN9__gnu_cxx12_Lock_policyE2EE10_M_disposeEv_ZN4llvm3sys2fs6detail27directory_iterator_destructERNS2_12DirIterStateE_ZN18BrowserASTConsumerD0Ev_ZN4llvm2cl3optIbLb0ENS0_6parserIbEEED2Ev_ZTVN4llvm2cl3optIbLb0ENS0_6parserIbEEEE_ZN4llvm2cl3optIbLb0ENS0_6parserIbEEED1Ev_ZN4llvm2cl3optINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEELb0ENS0_6parserIS7_EEED2Ev_ZTVN4llvm2cl3optINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEELb0ENS0_6parserIS7_EEEE_ZTVN4llvm2cl15OptionValueCopyINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEE_ZN4llvm2cl3optINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEELb0ENS0_6parserIS7_EEED1Ev_ZNK4llvm2cl15OptionValueCopyINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE7compareERKNS0_18GenericOptionValueEmemcmp_ZNSt23_Sp_counted_ptr_inplaceIN4llvm3sys2fs6detail15RecDirIterStateESaIS4_ELN9__gnu_cxx12_Lock_policyE2EE10_M_disposeEv__libc_single_threaded_ZNSt3setINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4lessIS5_ESaIS5_EED2Ev_ZNSt3setINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4lessIS5_ESaIS5_EED1Evmemcpy_ZSt19__throw_logic_errorPKc_ZSt17__throw_bad_allocv_ZSt20__throw_length_errorPKc_ZN4llvm2cl4listINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEbNS0_6parserIS7_EEED2Ev_ZTVN4llvm2cl4listINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEbNS0_6parserIS7_EEEE_ZN4llvm2cl4listINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEbNS0_6parserIS7_EEED1Ev_ZN4llvm2cl4listINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEbNS0_6parserIS7_EEED0Evstrlen_ZN5clang23ConstructionContextItem15getKindAsStringENS0_8ItemKindE_ZNK5clang23ConstructionContextItem15getKindAsStringEv_ZN17BrowserASTVisitor17VisitFunctionDeclEPN5clang12FunctionDeclE_ZTVN4llvm18raw_string_ostreamE_ZN4llvm11raw_ostream16SetBufferAndModeEPcmNS0_10BufferKindE_ZNK5clang13CXXMethodDecl8isStaticEv_ZNK5clang13CXXMethodDecl24begin_overridden_methodsEv_ZN9Annotator16registerOverrideEPN5clang9NamedDeclES2_NS0_14SourceLocationE_ZNK5clang13CXXMethodDecl22end_overridden_methodsEv_ZNK5clang4Type27getUnqualifiedDesugaredTypeEv_ZNK9Annotator10getTypeRefB5cxx11EN5clang8QualTypeE_ZN4llvm11raw_ostream5writeEPKcm_ZNK5clang9NamedDecl24getQualifiedNameAsStringB5cxx11Ev_ZNK5clang11ParmVarDecl13hasDefaultArgEv_ZN5clang11ParmVarDecl13getDefaultArgEv_ZNK5clang4Stmt11printPrettyERN4llvm11raw_ostreamEPNS_13PrinterHelperERKNS_14PrintingPolicyEjNS1_9StringRefEPKNS_10ASTContextE_ZNK5clang12FunctionDecl12getNumParamsEv_ZN4llvm11raw_ostreamD2Ev_ZNK5clang19DeclarationNameInfo16getEndLocPrivateEv_ZN9Annotator17registerReferenceEPN5clang9NamedDeclENS0_11SourceRangeENS_9TokenTypeENS_8DeclTypeENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES2__ZNK5clang4Decl15hasDefiningAttrEv_ZNK5clang4Decl8getAttrsEv_ZN17BrowserASTVisitor8classifyEv_ZN5clang4Expr25getReferencedDeclOfCalleeEv_ZN14ProjectManagerD2Evmemset_ZN14ProjectManagerD1Ev_ZN11ProjectInfoD2Ev_ZN11ProjectInfoD1Ev_ZN11ProjectInfoC2EOS__ZN11ProjectInfoC1EOS__ZN9GeneratorD2Ev_ZN9GeneratorD1Ev_ZNSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS5_EED2Ev_ZNSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS5_EED1Ev_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7reserveEm_ZN4llvm7ErrorOrISt10unique_ptrINS_12MemoryBufferESt14default_deleteIS2_EEED2Ev_ZN4llvm7ErrorOrISt10unique_ptrINS_12MemoryBufferESt14default_deleteIS2_EEED1Ev_ZN4llvm2cl3optINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEELb0ENS0_6parserIS7_EEEC2IJA2_cNS0_10value_descENS0_4descENS0_18NumOccurrencesFlagEEEEDpRKT__ZTVN4llvm2cl6OptionE_ZN4llvm2cl18getGeneralCategoryEv_ZTVN4llvm2cl11OptionValueINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEE_ZTVN4llvm2cl6parserINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEE_ZN4llvm2cl6Option9setArgStrENS_9StringRefE_ZN4llvm2cl6Option11addArgumentEv_ZN4llvm15SmallVectorBaseIjE8grow_podEPvmm_ZN4llvm2cl3optINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEELb0ENS0_6parserIS7_EEEC1IJA2_cNS0_10value_descENS0_4descENS0_18NumOccurrencesFlagEEEEDpRKT__ZN4llvm2cl4listINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEbNS0_6parserIS7_EEEC2IJA2_cNS0_10value_descENS0_4descENS0_18NumOccurrencesFlagEEEEDpRKT__ZN4llvm2cl4listINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEbNS0_6parserIS7_EEEC1IJA2_cNS0_10value_descENS0_4descENS0_18NumOccurrencesFlagEEEEDpRKT__ZNSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS5_EEC2ERKS7__ZSt28__throw_bad_array_new_lengthv_ZNSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS5_EEC1ERKS7__ZNSt6vectorIN5clang7tooling14CompileCommandESaIS2_EED2Ev_ZNSt6vectorIN5clang7tooling14CompileCommandESaIS2_EED1Ev_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag_Z16locationToStringB5cxx11N5clang14SourceLocationERNS_13SourceManagerE_ZNK5clang13SourceManager14getPresumedLocENS_14SourceLocationEb_ZNK4llvm5Twine3strB5cxx11Ev_ZN17BrowserASTVisitor13VisitCallExprEPN5clang8CallExprE_ZN9Annotator18getParamNameForArgB5cxx11EPN5clang8CallExprEPNS0_11ParmVarDeclEPNS0_4ExprE_ZNK5clang4Stmt11getBeginLocEv_ZN9Annotator12addInlayHintEN5clang14SourceLocationENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE_ZN9QtSupport13visitCallExprEPN5clang8CallExprE_ZNK5clang4Stmt14getSourceRangeEv_ZN9Annotator19annotateSourceRangeEN5clang11SourceRangeENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES7__ZNSt16_Sp_counted_baseILN9__gnu_cxx12_Lock_policyE2EE10_M_releaseEv_ZNSt5dequeIPN5clang4ExprESaIS2_EE17_M_push_front_auxIJRKS2_EEEvDpOT_memmove_ZN4llvm23SmallVectorTemplateBaseIcLb1EE9push_backEc_ZN4llvm15SmallVectorBaseImE8grow_podEPvmm_ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES5_St9_IdentityIS5_ESt4lessIS5_ESaIS5_EE16_M_insert_uniqueIS5_EESt4pairISt17_Rb_tree_iteratorIS5_EbEOT__ZSt29_Rb_tree_insert_and_rebalancebPSt18_Rb_tree_node_baseS0_RS__ZSt18_Rb_tree_decrementPSt18_Rb_tree_node_base_ZN13BrowserAction17CreateASTConsumerERN5clang16CompilerInstanceEN4llvm9StringRefE_ZN13BrowserAction9processedB5cxx11E_ZN13BrowserAction14projectManagerE_ZSt4cerr_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l_ZNSo3putEc_ZNSo5flushEv_ZNKSt5ctypeIcE13_M_widen_initEv_ZSt16__throw_bad_castv_ZNSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS5_EE17_M_realloc_insertIJRKS5_EEEvN9__gnu_cxx17__normal_iteratorIPS5_S7_EEDpOT__ZNSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS5_EE9push_backERKS5__ZN4llvm2cl4listINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEbNS0_6parserIS7_EEE10setDefaultEv_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_mutateEmmPKcm_ZNK14string_builderIS_IS_IS_IS_IA18_cNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEA6_cEA15_cES6_EA5_cEcvS6_Ev_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_assignERKS4__ZNSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS5_EE17_M_realloc_insertIJS5_EEEvN9__gnu_cxx17__normal_iteratorIPS5_S7_EEDpOT__ZN4llvm3sys2fs6accessERKNS_5TwineENS1_10AccessModeE_ZN5clang7tooling26getClangSyntaxOnlyAdjusterB5cxx11Ev_ZN5clang7tooling27getClangStripOutputAdjusterB5cxx11Ev_ZTVSt23_Sp_counted_ptr_inplaceIN5clang22PCHContainerOperationsESaIS1_ELN9__gnu_cxx12_Lock_policyE2EE_ZN5clang22PCHContainerOperationsC1Ev_ZN5clang14FrontendActionC2Ev_ZTV13BrowserAction_ZN5clang7tooling14ToolInvocationC1ESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS8_EESt10unique_ptrINS_14FrontendActionESt14default_deleteISC_EEPNS_11FileManagerESt10shared_ptrINS_22PCHContainerOperationsEE_ZN5clang7tooling14ToolInvocation3runEv_ZN5clang7tooling14ToolInvocationD1Ev_ZSt25__throw_bad_function_callv_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_replaceEmmPKcm_ZN23BrowserDiagnosticClient16HandleDiagnosticEN5clang17DiagnosticsEngine5LevelERKNS0_10DiagnosticE_ZNK5clang10Diagnostic16FormatDiagnosticERN4llvm15SmallVectorImplIcEE_ZNK4llvm9StringRef4findES0_m_ZN9Annotator16reportDiagnosticEN5clang11SourceRangeERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES9__ZNSt9basic_iosIcSt11char_traitsIcEE5clearESt12_Ios_Iostate_ZN5clang25LazyGenerationalUpdatePtrIPKNS_4DeclEPS1_XadL_ZNS_17ExternalASTSource19CompleteRedeclChainES3_EEE9makeValueERKNS_10ASTContextES4__ZN4llvm15allocate_bufferEmm_ZN17BrowserASTVisitor12VisitTagDeclEPN5clang7TagDeclE_ZN5clang8QualType27getSplitUnqualifiedTypeImplES0__ZNK5clang4Type18getAsCXXRecordDeclEv_ZNK5clang13CXXRecordDecl14DefinitionData16getBasesSlowCaseEv_ZNSt6vectorIN4llvm3sys2fs18directory_iteratorESaIS3_EE17_M_realloc_insertIJS3_EEEvN9__gnu_cxx17__normal_iteratorIPS3_S5_EEDpOT__ZN4llvm3sys2fs28recursive_directory_iteratorC2ERKNS_5TwineERSt10error_codeb_ZTVSt23_Sp_counted_ptr_inplaceIN4llvm3sys2fs6detail15RecDirIterStateESaIS4_ELN9__gnu_cxx12_Lock_policyE2EE_ZTVSt23_Sp_counted_ptr_inplaceIN4llvm3sys2fs6detail12DirIterStateESaIS4_ELN9__gnu_cxx12_Lock_policyE2EE_ZNK4llvm5Twine8toVectorERNS_15SmallVectorImplIcEE_ZN4llvm3sys2fs6detail28directory_iterator_constructERNS2_12DirIterStateENS_9StringRefEb_ZN4llvm3sys2fs28recursive_directory_iteratorC1ERKNS_5TwineERSt10error_codeb_ZNK4llvm24ThreadSafeRefCountedBaseINS_3vfs10FileSystemEE7ReleaseEv_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE4swapERS4_main_ZN5clang7tooling24FixedCompilationDatabase19loadFromCommandLineERiPKPKcRNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKN4llvm5TwineE_ZN4llvm2cl23ParseCommandLineOptionsEiPKPKcNS_9StringRefEPNS_11raw_ostreamES2_b_Z8DataPathB5cxx11_Z10OutputPathB5cxx11_ZN14ProjectManagerC1ENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES5__Z12ProjectPathsB5cxx11memchr_ZN14ProjectManager10addProjectE11ProjectInfo_Z20ExternalProjectPathsB5cxx11_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6__Z11SourcePathsB5cxx11ProcessAllSources_ZN4llvm3vfs17getRealFileSystemEv_ZN4llvm3vfs17OverlayFileSystemC1ENS_18IntrusiveRefCntPtrINS0_10FileSystemEEE_ZN5clang11FileManagerC1ERKNS_17FileSystemOptionsEN4llvm18IntrusiveRefCntPtrINS4_3vfs10FileSystemEEE_ZN4llvm3vfs18InMemoryFileSystemC1Eb_ZN4llvm3vfs17OverlayFileSystem11pushOverlayENS_18IntrusiveRefCntPtrINS0_10FileSystemEEE_ZN4llvm12MemoryBuffer16getMemBufferCopyENS_9StringRefERKNS_5TwineE_ZN4llvm3vfs18InMemoryFileSystem7addFileERKNS_5TwineElSt10unique_ptrINS_12MemoryBufferESt14default_deleteIS6_EESt8optionalIjESB_SA_INS_3sys2fs9file_typeEESA_INSD_5permsEE_ZN5clang7tooling15getAbsolutePathB5cxx11EN4llvm9StringRefE_Z12canonicalizeRKN4llvm5TwineERNS_15SmallVectorImplIcEE_ZN14ProjectManager14projectForFileEN4llvm9StringRefE_ZNK14ProjectManager13shouldProcessEN4llvm9StringRefEP11ProjectInfo_ZN4llvm3sys4path9extensionENS_9StringRefENS1_5StyleE_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc_ZNSo9_M_insertImEERSoT__ZN5clang11FileManagerD1Evlocaltimestrftime_ZN4llvm12MemoryBuffer7getFileERKNS_5TwineEbbbSt8optionalINS_5AlignEE_ZN9Generator8generateEN4llvm9StringRefENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS7_PKcSB_S1_S1_RKSt3setIS7_St4lessIS7_ESaIS7_EERK11ProjectInfo_ZNSt14basic_ofstreamIcSt11char_traitsIcEEC1Ev_ZNSt13basic_filebufIcSt11char_traitsIcEE4openEPKcSt13_Ios_Openmode_ZNSt14basic_ofstreamIcSt11char_traitsIcEED1Ev_ZN4llvm3sys2fs12is_directoryERKNS_5TwineERb_ZN4llvm3sys4path6nativeERKNS_5TwineERNS_15SmallVectorImplIcEENS1_5StyleE_ZNSt3_V215system_categoryEv_ZN4llvm3sys4path8filenameENS_9StringRefENS1_5StyleE_ZN4llvm3sys2fs6detail28directory_iterator_incrementERNS2_12DirIterStateE_ZN5clang7tooling23JSONCompilationDatabase12loadFromFileEN4llvm9StringRefERNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS0_21JSONCommandLineSyntaxE_ZN5clang7tooling19CompilationDatabase17loadFromDirectoryEN4llvm9StringRefERNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE_ZNK4llvm3sys2fs15directory_entry6statusEv_ZSt24__throw_out_of_range_fmtPKcz_ZN4llvm2cl3optINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEELb0ENS0_6parserIS7_EEED0Ev_ZN4llvm2cl3optIbLb0ENS0_6parserIbEEED0Ev_ZNSt6vectorIjSaIjEE17_M_realloc_insertIJRKjEEEvN9__gnu_cxx17__normal_iteratorIPjS1_EEDpOT__ZN4llvm2cl4listINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEbNS0_6parserIS7_EEE16handleOccurrenceEjNS_9StringRefESB___dso_handle_ZNSt8ios_base4InitC1Ev_GLOBAL_OFFSET_TABLE__ZNSt8ios_base4InitD1Ev__cxa_atexit_ZTVN4llvm2cl11OptionValueIbEE_ZTVN4llvm2cl6parserIbEEextra_ZN4llvm2cl9extrahelpC1ENS_9StringRefE_ZNK5clang19NestedNameSpecifier7getKindEv_ZNK5clang22NestedNameSpecifierLoc14getSourceRangeEv_ZNK5clang19NestedNameSpecifier19getAsNamespaceAliasEv_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE27TraverseTemplateArgumentLocERKNS_19TemplateArgumentLocE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE30TraverseNestedNameSpecifierLocENS_22NestedNameSpecifierLocE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE27TraverseTranslationUnitDeclEPNS_19TranslationUnitDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE22TraverseAccessSpecDeclEPNS_14AccessSpecDeclE_ZNK5clang12CapturedDecl7getBodyEv_ZN17BrowserASTVisitor12TraverseStmtEPN5clang4StmtE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE17TraverseBlockDeclEPNS_9BlockDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE17TraverseEmptyDeclEPNS_9EmptyDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE44TraverseClassScopeFunctionSpecializationDeclEPNS_36ClassScopeFunctionSpecializationDeclE_ZN17BrowserASTVisitor30TraverseNestedNameSpecifierLocEN5clang22NestedNameSpecifierLocE_ZN5clang11DeclContext7classofEPKNS_4DeclE_ZN5clang4Decl17castToDeclContextEPKS0__ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE41TraverseImplicitConceptSpecializationDeclEPNS_33ImplicitConceptSpecializationDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE21TraverseNamespaceDeclEPNS_13NamespaceDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE18TraverseFriendDeclEPNS_10FriendDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE12TraverseAttrEPNS_4AttrE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE37TraverseLifetimeExtendedTemporaryDeclEPNS_29LifetimeExtendedTemporaryDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE31TraverseObjCCompatibleAliasDeclEPNS_23ObjCCompatibleAliasDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE15TraverseTypeLocENS_7TypeLocE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE18TraverseImportDeclEPNS_10ImportDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE26TraverseNamespaceAliasDeclEPNS_18NamespaceAliasDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE26TraverseFriendTemplateDeclEPNS_18FriendTemplateDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE17TraverseLabelDeclEPNS_9LabelDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE25TraverseObjCTypeParamDeclEPNS_17ObjCTypeParamDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE22TraverseFunctionHelperEPNS_12FunctionDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE39TraverseClassTemplateSpecializationDeclEPNS_31ClassTemplateSpecializationDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE25TraverseIndirectFieldDeclEPNS_17IndirectFieldDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE35TraverseTemplateParameterListHelperEPNS_21TemplateParameterListE_ZN17BrowserASTVisitor12TraverseDeclEPN5clang4DeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE15TraverseVarDeclEPNS_7VarDeclE_ZN5clang18UsingDirectiveDecl21getNominatedNamespaceEv_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE23TraverseOMPRequiresDeclEPNS_15OMPRequiresDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE28TraverseOMPThreadPrivateDeclEPNS_20OMPThreadPrivateDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE28TraverseObjCPropertyImplDeclEPNS_20ObjCPropertyImplDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE25TraversePragmaCommentDeclEPNS_17PragmaCommentDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE32TraversePragmaDetectMismatchDeclEPNS_24PragmaDetectMismatchDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE28TraverseRequiresExprBodyDeclEPNS_20RequiresExprBodyDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE31TraverseTemplateParamObjectDeclEPNS_23TemplateParamObjectDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE22TraverseObjCMethodDeclEPNS_14ObjCMethodDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE21TraverseTypeAliasDeclEPNS_13TypeAliasDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE17TraverseOMPClauseEPNS_9OMPClauseE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE32TraverseTemplateTemplateParmDeclEPNS_24TemplateTemplateParmDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE19TraverseParmVarDeclEPNS_11ParmVarDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE19TraverseBindingDeclEPNS_11BindingDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE25TraverseObjCInterfaceDeclEPNS_17ObjCInterfaceDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE28TraverseTemplateTypeParmDeclEPNS_20TemplateTypeParmDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE18TraverseMSGuidDeclEPNS_10MSGuidDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE25TraverseImplicitParamDeclEPNS_17ImplicitParamDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE23TraverseUsingShadowDeclEPNS_15UsingShadowDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE33TraverseUnnamedGlobalConstantDeclEPNS_25UnnamedGlobalConstantDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE24TraverseDeclaratorHelperEPNS_14DeclaratorDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE12TraverseTypeENS_8QualTypeE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE18TraverseRecordDeclEPNS_10RecordDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE44TraverseVarTemplatePartialSpecializationDeclEPNS_36VarTemplatePartialSpecializationDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE27TraverseObjCAtDefsFieldDeclEPNS_19ObjCAtDefsFieldDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE23TraverseOMPAllocateDeclEPNS_15OMPAllocateDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE46TraverseClassTemplatePartialSpecializationDeclEPNS_38ClassTemplatePartialSpecializationDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE25TraverseDecompositionDeclEPNS_17DecompositionDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE21TraverseUsingPackDeclEPNS_13UsingPackDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE37TraverseVarTemplateSpecializationDeclEPNS_29VarTemplateSpecializationDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE19TraverseTypedefDeclEPNS_11TypedefDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE27TraverseOMPCapturedExprDeclEPNS_19OMPCapturedExprDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE16TraverseEnumDeclEPNS_8EnumDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE31TraverseNonTypeTemplateParmDeclEPNS_23NonTypeTemplateParmDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE17TraverseFieldDeclEPNS_9FieldDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE24TraverseEnumConstantDeclEPNS_16EnumConstantDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE24TraverseObjCProtocolDeclEPNS_16ObjCProtocolDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE34TraverseConstructorUsingShadowDeclEPNS_26ConstructorUsingShadowDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE35TraverseUnresolvedUsingIfExistsDeclEPNS_27UnresolvedUsingIfExistsDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE21TraverseCXXRecordDeclEPNS_13CXXRecordDeclE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE20TraverseObjCIvarDeclEPNS_12ObjCIvarDeclE_ZNK5clang19NestedNameSpecifier14getAsNamespaceEv_ZN5clang7VarDecl7getInitEv_ZNK5clang11DeclContext11decls_beginEv_ZN18BrowserASTConsumer21HandleTranslationUnitERN5clang10ASTContextE_ZN9Annotator8generateERN5clang4SemaEb_ZNK5clang10LambdaExpr13capture_beginEv_ZNK5clang10LambdaExpr13isInitCaptureEPKNS_13LambdaCaptureE_ZNK5clang10LambdaExpr15getCallOperatorEv_ZNK5clang10LambdaExpr24getTemplateParameterListEv_ZN5clang7TypeLoc24getLocalAlignmentForTypeENS_8QualTypeE_ZNK5clang10LambdaExpr25getTrailingRequiresClauseEv_ZNK5clang10LambdaExpr7getBodyEv_ZN5clang4Stmt8childrenEv_ZN5clang12CapturedStmt15getCapturedDeclEv_ZN5clang15CXXForRangeStmt12getRangeInitEv_ZNK5clang26CXXRewrittenBinaryOperator17getDecomposedFormEv_ZN5clang16StmtIteratorBase6NextVAEv_ZNK5clang16StmtIteratorBase11GetDeclExprEv_ZN5clang16StmtIteratorBase8NextDeclEb_ZN9QtSupport21visitCXXConstructExprEPN5clang16CXXConstructExprE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE25VisitOMPCopyprivateClauseEPNS_20OMPCopyprivateClauseE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE20VisitOMPCopyinClauseEPNS_15OMPCopyinClauseE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE26VisitOMPFirstprivateClauseEPNS_21OMPFirstprivateClauseE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE20VisitOMPLinearClauseEPNS_15OMPLinearClauseE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE25VisitOMPNontemporalClauseEPNS_20OMPNontemporalClauseE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE30TraverseOMPDeclareSimdDeclAttrEPNS_22OMPDeclareSimdDeclAttrE_ZNK5clang12InitListExpr26isIdiomaticZeroInitializerERKNS_11LangOptionsE_ZN9Annotator23getDesignatorInlayHintsB5cxx11EPN5clang12InitListExprE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE25VisitOMPLastprivateClauseEPNS_20OMPLastprivateClauseE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE24TraverseTemplateArgumentERKNS_16TemplateArgumentE_ZN5clang12TemplateNameC1EPv_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE20TraverseTemplateNameENS_12TemplateNameE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE25TraverseFunctionProtoTypeEPNS_17FunctionProtoTypeE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE27TraverseNestedNameSpecifierEPNS_19NestedNameSpecifierE_ZNK5clang19DependentBitIntType14getNumBitsExprEv_ZNK5clang29SubstTemplateTypeParmPackType15getArgumentPackEv_ZNK5clang12TemplateName26getAsDependentTemplateNameEv_ZNK5clang12TemplateName26getAsQualifiedTemplateNameEv_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE37TraverseTemplateSpecializationTypeLocENS_29TemplateSpecializationTypeLocE_ZNK5clang12TemplateName17getAsTemplateDeclEv_ZNK5clang8QualType11getAsStringB5cxx11Ev_ZNK5clang7TypeLoc9getEndLocEv_ZNK5clang7TypeLoc11getBeginLocEv_ZNK5clang7TagType7getDeclEv_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE19TraverseAutoTypeLocENS_11AutoTypeLocE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE28TraverseMemberPointerTypeLocENS_20MemberPointerTypeLocE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE29TraverseMacroQualifiedTypeLocENS_21MacroQualifiedTypeLocE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE28TraverseFunctionProtoTypeLocENS_20FunctionProtoTypeLocE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE30TraverseFunctionNoProtoTypeLocENS_22FunctionNoProtoTypeLocE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE25TraverseElaboratedTypeLocENS_17ElaboratedTypeLocE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE27TraverseBlockPointerTypeLocENS_19BlockPointerTypeLocE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE31TraverseBTFTagAttributedTypeLocENS_23BTFTagAttributedTypeLocE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE25TraverseAttributedTypeLocENS_17AttributedTypeLocE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE21TraverseAtomicTypeLocENS_13AtomicTypeLocE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE28TraverseVariableArrayTypeLocENS_20VariableArrayTypeLocE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE30TraverseIncompleteArrayTypeLocENS_22IncompleteArrayTypeLocE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE34TraverseDependentSizedArrayTypeLocENS_26DependentSizedArrayTypeLocE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE28TraverseConstantArrayTypeLocENS_20ConstantArrayTypeLocE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE30TraverseRValueReferenceTypeLocENS_22RValueReferenceTypeLocE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE30TraverseLValueReferenceTypeLocENS_22LValueReferenceTypeLocE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE22TraversePointerTypeLocENS_14PointerTypeLocE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE19TraversePipeTypeLocENS_11PipeTypeLocE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE20TraverseParenTypeLocENS_12ParenTypeLocE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE28TraversePackExpansionTypeLocENS_20PackExpansionTypeLocE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE25TraverseObjCObjectTypeLocENS_17ObjCObjectTypeLocE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE32TraverseObjCObjectPointerTypeLocENS_24ObjCObjectPointerTypeLocE_ZNK5clang22NestedNameSpecifierLoc10getTypeLocEv_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE23VisitOMPReductionClauseEPNS_18OMPReductionClauseE_ZNK5clang11AutoTypeLoc18getConceptNameInfoEv_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE23TraverseCXXRecordHelperEPNS_13CXXRecordDeclE_ZNK5clang4Decl13getASTContextEv_ZNK5clang7VarDecl28isThisDeclarationADefinitionERNS_10ASTContextE_ZNK5clang13CXXRecordDecl29getTemplateSpecializationKindEv_ZNK5clang17ObjCInterfaceDecl22LoadExternalDefinitionEv_ZNK5clang7VarDecl29getTemplateSpecializationKindEv_ZNK5clang9FieldDecl21getInClassInitializerEv_ZN5clang11ParmVarDecl27getUninstantiatedDefaultArgEv_ZN5clang11OMPChildren11getChildrenEv_ZNK4llvm5APInt8toStringERNS_15SmallVectorImplIcEEjbbb_ZNK5clang10ASTContext20getObjCInterfaceTypeEPKNS_17ObjCInterfaceDeclEPS1__ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE27VisitOMPTaskReductionClauseEPNS_22OMPTaskReductionClauseE_ZN5clang19RecursiveASTVisitorI17BrowserASTVisitorE25VisitOMPInReductionClauseEPNS_20OMPInReductionClauseE_ZNK5clang23OMPUsesAllocatorsClause16getAllocatorDataEj_ZGVZNK5clang24TemplateTemplateParmDecl18getDefaultArgumentEvE7NoneLoc_ZZNK5clang24TemplateTemplateParmDecl18getDefaultArgumentEvE7NoneLoc__cxa_guard_acquire__cxa_guard_release_ZNK5clang13CXXRecordDecl13isDerivedFromEPKS0__ZNK5clang8concepts15ExprRequirement21ReturnTypeRequirement17getTypeConstraintEv_ZNK5clang12FunctionDecl29getTemplateSpecializationInfoEv_ZNK5clang18CXXConstructorDecl10init_beginEv_ZN5clang4Decl19castFromDeclContextEPKNS_11DeclContextE_ZNK5clang13CXXRecordDecl21getLambdaCallOperatorEv_ZN4llvm2cl18GenericOptionValue6anchorEv_ZN4llvm2cl6Option6anchorEv_ZN4llvm2cl6Option13addOccurrenceEjNS_9StringRefES2_b_ZNK5clang18DiagnosticConsumer25IncludeInDiagnosticCountsEv_ZN5clang11ASTConsumer21HandleInterestingDeclENS_12DeclGroupRefE_ZN5clang11ASTConsumer33HandleTopLevelDeclInObjCContainerENS_12DeclGroupRefE_ZN5clang11ASTConsumer24HandleImplicitImportDeclEPNS_10ImportDeclE_ZN5clang17ASTFrontendAction13ExecuteActionEv_ZN5clang14FrontendAction22shouldEraseOutputFilesEv_ZN5clang14FrontendAction13EndSourceFileEv_ZN4llvm30VerifyDisableABIBreakingChecksE_ZN4llvm24DisableABIBreakingChecksE8IX'Vc,9T?U7mHr89I:'27271  7 7F p ?  7 H 8# x@ J} y J K v  c 77L23_.nMv(N<vO  M 2U4xP2Q2R2'S/2QL^27Yoccn727STgvUv#/V7`vxAWF^ 7? a q   4 !!!!W!!"r>"7k"7"P"#B#f#Wu##7#9D$XI$:N$S$X$%2i%2%2%_&2&&2+'2' (2$(J)2)*>* ,2B-7h.2 /@/r/:07m122#474757G77<878797:::::S ;H;;; <<K=[=g=D>U`>U>>>?]? m?y?@ AB3AMAUiAUAABB$BBBCS#CC DD(DDDDQEBEoFFFFUFUF1GBDG^GUzGUGHH(H4HHII]I)mIyIIIIJU-JUTJfJJKBKKUKULdLxLLLLBzM MUMUMMB`OtOOOPP)P5PP>>QJQ>Q*QQQQQQRR RRRR&R+R0R5R:R?RDRIRNRSRXR]RbRgRlRqRvR{RRRRRRRRRRRRRRRRRRRRRR SSSS S%S*S/S4S9S>SCSHSMSRSWS\SaSfSkSpSuSzSSSSSSSS@T?T?TI&UIZUUsUUUUUVU VzVUVUVV+WUDWU`Wa ;<t<<<==!>>>?  @ 3@ Z@ AABOCvCCCCD%ELEEF ;F!bF",G#G$G%H&H'H(J*J+K,*K-QK.4L/[L0:M1aM2FN3mN4N5N6N7 O80O9WOO:P;P<P=Q*     !0CNl 6# (Sd   '  ! %? GV    M%&%&%=% ((  &@ HQe+ :%M%. 2/ \%%4244Y4|6C < Oi/  %8% < Qm/ #4%H%_.KXy)Xr'[~#Uo'^{Jd~/Ru9Y#Y3ZC[S\c]s^_`abX@XTXh X|XXXX X@$XT@MeoC 5DUEFGH I7JOK{LK&J>KKMNcO!PsdxK7C QEiRSeKf KDITT dK(g0Kxh}KIi K U j K PWZIIPIPI=WdY0Jk8RlS7Q^U:\W79I: g ;kSlhi . j k?ykml g Ykulh< j R?`kmlK77A9FrMIR:YH^8<Vo7H89I:7W|P{8|H}c~ITm vvnv1IZ444xNb&r+92o7:?J229A2ivp 2"2Ozv pqv6>X ;@l@rE:7H89I:ed779H8I:77 7#A9I:Jc77 4No!X&:X79 I:tSrX:7 ;w7(M7ws|:%t%x5?MuRcov|wwx6?Pyzl?}v&Fn$mDT^"S_pGTSn k>rr:HOOo  {Yo%4'444F?h|y}G| ~D||6T[b|viv 'GpNVmt|,v{v vI  ( F d      3 : ? M R    3\$Y2xv@GLZ_7#*|18|o|vL '=O_G?T-k,4T[ccv u :EzbbZ%2}cs9l AbKuc%9E -9cA{+2>l 7h $IobbbZ !!!!!!"+"@"G"W"o"L ""S##c<$7(%7D%h&&&'| '8''''(0(J(R(\(`|((e(9)2t*****+K+U+ue+++++c++++, ,+,5,`U,f,e,,,u-7}------L .75/c/1)111L >1 ]11c272 2P3333|333333Z47=4f44{44 4<55564p6464.7A7rs7x77777W8I8o88|88888829<99V9|]9g99794+:: ;;;E;s;;O;O<q<{<<#=|*=B=%y===4=4>6>v_>v~?^??\?\?????@&@%z@X@:@@@@@@@@@@@@@AA AA/A7ABAteAwA*~AAAAAAnA0AAg AB BlB"B3B>BEBkLB WB(bB0mB<tBD{BdBhBTBhBDBHB<BB< BBBj BBBC|CCCC'C4CDCKCQCCXC`CkC tCl{C=CCCCCCCnCCD D!D(DDDpKD=RD`DpDwDDDDpDD=DDDDDEn,E3Eg ;EG BELIEUE`EkgEdnEyEEE E(E0E<EDEhETEhEHEDE<EFFF%F ,F3F+ :FAF HFOF[FjFCrFxF{~FkFF FFlF,FFFFFFFFFF G$G69GLOGmUGDpGmvGDG2G. 2/ \%%+ :%L%Z7 >rC:tv rB7 A;U<< <<<=? >$4@(=,?0>4@8z=<?@e>D{@H>L5@P?T@X9A\K`iGdqOhElMpbIttPxgB|LHOyFNIK9Q9BLQHO2FNrJ2QLCMdIPGOKQAoLHN8FMLJOeC4M}INO G JNKOBH F$K(*F,I0G4bL8dD<D@DDDHELEP?ETEX=\?t:?IAIt?:4Be<?IIACaD?:E6FA:?3?IG HI*GG*OG?IG3IcIG+HJOJJ\D4p-eQA}1m! ]    M   J/ p J  J  K O c s L~  \JS8J Yz KJ5 x z6 $zZzJvJJ^; M VJ C  't t J]7 z   e!J!!"""7##J$?${$&%%|&& I'' 'J2(o((P))*M+*@**+za++ *,,-r--. w.. @//0k0J011S1 2 22233Jt33J@4!5NI5Nq5N5N5N5N6N96Na6N6N6N6N7N)7NQ7Ny7N7N7N7N8N/8O<8OI8OV8Oc8Op8O}8O8O8O8O8O8O8O8O8O8O8O 9O9O&9O89PS9Pn9P9P9P9P9P9P:P+:PF:Pa:P|:P:P:P:P:P;P;P9;Pg<l<q<v<{<<<<<<<<<<<<<<<<<<<<<<<<<<<== ==== =%=*=/=4=9=>=C=H=M=R=W=\=a=f=k=p=u=z===========================>> >>>>>$>)>.>3>8>=>B>G>L>Q>V>[>`>e>j>o>t>y>~>>>>>>>>>>>>>>>>>>>>>>>>>>?? ?????#?(?-?2?7?)?*@u*A*B*C*D3+EX+F+G+H+I!,J],K,L,M,N-OD-Pi-Q-R-S.TI.Un.V.W.X/Y7/Zs/[/\/]/^50_^01`A1af1b1c@2de2e2f&3g3h3i4j4k4l5mr;n;<;o;p!<qL<r   s  Y  &$ (E,I0M)4)8o!<1@%D,HL PTX\s`dh l p"t+*xV"|.L&T-. a '- +$%/)Z)!1%,u.G'A.G0$+008  $   D    O$(P,0348<@DTHLPT$ X \< `Sd hGl pNt-x|mL  * q ` lt t &\/E(4, ' $(,%048<_@D_HLK-PT'1X)\.`"dh\lv0p@!t2x &|4X),2%q6(/ r74*f1")5v'`.5+O2#5A(I/h7)0!E4r&- 7,2x$ 6$((/, 074S*81<O"@A5D&H.L8PF7T<+Xn2\ $`5d`(hh/lpT4t)x|!-&-% .u^M(`F%!B R e x / I &JcNOP&JcNOPSx[QS~vG:??>II@@@i)b'gXNO1PDPLDXY=&|NOP@P}i&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP,@J}NOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP&JcNOP'YJYONP@izH[\\]*\]*\,]f*\]*0[`x]*<U***]*_ %_d[` %*t[IIa [%*m][ *5**n^_hl8 x4 $(,04|8X<T@HD$HLPpTTX \`dThlp8tx|pt \8<*X*H*@_P***b@p*c*_$n_~*S\\]*![@[X1=>v>@] *;eS]COWcdk}fIGghi>SYG]ghIaT[v@`ghJ@S *>j8dklmnoypqrs-tZuvwxy;zh{|}~  jA $(x,04L8<@DHRL4PTfX=\`dhlptxk |B ! }Tkz(,PGr  ?&:`*?#I)*^IG6 TG6 TG6 TG6 TG9 G9 G8 G9 ZS7LW f>><vS>\h>u?fS7Uj_5>v>4*qS}>:I>rDGp G9 " 0Gi*T G9 NGp  G9 G9 G9 G9 G9 G9 N  G~Su>>u:J?f?IQIS>>r m*,d4t(dx4pTb ~W;GC GKOSW [$_(c,g0k4o8s<w@{DHLPTX\`dhlptx| #'+/3;?C GKOSW [$_(,g0k48s<w@{DHFLPTX\`dhlptx|S, #'+37;?C GKOSW [$_(c,g0k4o8s<w@{DHLPTX\`dhlptx|W@8$+/37;?C GKOSW [$_(c,g0k4o8s<w@{DHLPTX\`dhlptx|8M #'+/37;?C GKSW [$_(,g0k4o8s<w@DHLPTX\`#dhlptx| #'+g*7;?C GKOSW $_(c,g0k4o8s<w@{DHLPTX\`dhlptx|!$4SGU`U$4SGU`U$4SGU`U$4SGU`U$4SGU`U!S7UTUx)^ S'3VSt>UUNZ>gBR\JS):FVSpUU?'U e S$7GS>>&U>UesHJtS SUU0p@ SUU$mhEJJuS S"U:U\yJJuS S"U:U\y$4SGU`U$4SGU`U$4SGU`U$4SGU`U$4SGU`U$4SGU`U$4SGU`U$4SGU`U$4SGU`U$4SGU`U$4SGU`U(S;UTUp# 6O}UUHXbJS1 A~S>6W){?SUUBO>\$4SGU`U7JbS)SUU0z7JbS)SUU0d7JbS)SUU0dAQ[JS)9MTMr~SUU?$N/DOa) W:??NIUUaIEXbJS!)g:??VIaISUU4??[T?:UU4?IS(r-I[9";SOUiUBR\JS):FVSpUU?dS~SUU *PlxSUUiSS+7ZStUU>DP>]7CSSfUU)FR^nSUU.lv%S$0@SRUjU)^tSUU [gS>-9>F\b_(*<l**S U)U@[*c@_BR\JS):FVSpUU?8yJNO)PDP`SzUU2 FJNO PFJNO P+8Q c7 KJNO P/ CJNO P  2 FJNO P2 FJNO P[Qn SwBS=GiJ<IlS>>N4OIPw > >%BS=Go  JS>>K>mNOP &>kx>aSG&S@J>>/v >>S4S @S/9S-F>>/Va >#>`@S/9S-F>>/Va >#>`665E:F=}5uE#}dtTVuSURZ)555 555d5h5l 5p$5t(5x,5054585<5@5D5YH5L5P5T5X5\5 `5d5h5l5p5t5?x53|5H55555P5h5505555555L5 555Y55 5T55,5`555l5p5D5555}5X 5\5 5d5h5l 5$5B(5x,5|054585<5#@5L D5H5L5P5T5X5\5`5d5h5l5p5t5ux5t@\hxSUU)PSUU 0a?|: S3UPUg?II_?z:SU.UUk,?II_:??ZIIS,UDUlEXbJSG:??6IAI)S!U9Ud??{SJ"NOP>t>?{SJ"NOP>t>NRl{VFSbSpJ"mNOP>5C>l7VRSC|S0J!NOP6>hw>6*eS(ySUUH e>>u?::;?W?Ie 2AjS1I\?IJ,uNOP>+:>GI{> :?8?IS.ULU_?:1IT?II@ ??kI?:"S5URUpI?>I^IID4 Yx# }CS  !  >  I Z @ : T >] m >  S 9  1 (=0>8@HPX` (08@H)  ( 08@AHPX`hpx (08@HPX`hpx '(#0 3("0 $(!0MMM  M@0Mc8McHM PMP`MlhM8lxMM NM;M{M{MMMMNM(cMM N(M88M@M@PMخXMhMpMN.MHMMNJMN`MpNvMNM*NM(2(N0M@NHMXN`M(pNxMPNMvN,M؎NAMMMMMȲNUMNi M`70N}8M7HNPM:`NhM:xM8=MX=M+>M+>NMq>NMH>NM>NMp?N)M:? N>(M=?8NN@MC?PNdXMP?hNpMU?NMHZ?NM_?NMc?NM0m?NM@?NMX?N(MB(NA0MhB@NXHMBXNj`MBpNxMBNMxjCNMuCMCM@CNMCNMXINMKN M(K0N)8MKHMKPMK`NAhMOxNXM#PNnM PNMPNM0PNM0PNMQNMIQ N(MhQ8N @MwQPN XMQhN8pMHQNLMPQNbMQNwMБRNM RNM0RNMSNMS(N0MHS@NHM*aXN`M/apN.xMH3aNDMH]aN]MdaNvMlaNMtaNM(ya N Ma N Ma0 N8 MPaH N P Ma` N# h Max N9 Ma NP Mpa Nl Ma N Ma N Mb N MX/b N  Mh1b N ( MNb8 N @ MpobP N X Mch N" p MHd N5 Md NK M0(d N_ MX,d Nr M=d N Md N Md N  MPd( N 0 MGe@ N H MeX N ` Mep N x Me N" Mf N5 M8 f NL Mf N` Mf Nv M`f N  M-f N MAg0 N 8 MFgH N P M`Kg` N h Mh|hx N MXh N. Mh NG M8h N] Mxh Ns Mh=j N M@j N  MPj N ( Mj8 N @ MjP N X Mj N x 4H\p     $8L`t(<P d!x"#$%&'()*,+@,T-h.|/01234 5 6<7`8|9:;< =4>H?\@pABCD0FXGxHIJHLpPQ<RT$ P ` Xt Z [ \\ ] ^ _ ` a, b| c d e( ft g h  0 i j k lmxnopdqrsHtu p \xyz4{|}H~\p`$T A :S$l|0s0t1uD1y1z1{\2|2}2~ 3l333<4x444,5h5556X666 7H777788t888(9d999:T:::;D;;;;4<p<<<$=`===>P>>>?@?|???0@l@@@ A\AAABLBBBCl?l@`mAmB`Cl>@Hp?`Dl?@p@`El@@hpA`Fl}A@p}B`GlxB@p~C`HlyC@puD`IlpD@ppE`JlkE@8piF`KldF@pjG`LleG@XpiH`MldH@phI`NlcI@xpeJ`Ol`J@pZK`PlUK@pcL`Ql^L@(p\M`RlWM@paN`Sl\N@HprO`TlmO@pqP`UllP@hpnQ`VliQ@psR`WlnR@pxS`XlsS@pkT`YlfT@p pU`ZlkU@8p }V`[lxV@pxW`\lsW@XpmX`]lhX@ptY`^loY@xpsZ`_lnZ@pj[``le[@pg\`alb\@(ph]`blc]@po^`clj^@Hp~_`dly_@p ``elz`@hp"ra`flma@p$qb`gllb@p&hc`hlcc@p(]d`ilXd@p*ne`jlie@8p,uf`klpf@p.~g`llyg@Xp0{h`mlvh@p2i`nli@xp4j`ol~j@p6k`plk@p8l`qll@(p:m`rlm@p<n`sln@Hp>o`tlo@p@p`ulp@hpBzq`vluq@pD}r`wlxr@pF~sxlys@0pHtyl|t@pJzuzluu@PpLuv{lpv@pNjw|lew@ppP]x}lXx@pRTy~lOy@pTGzlBz@ pVT{lO{@pXM|lH|@@pZJ}lE}@p\K~lF~@`p^HlC@p`6l1@pb5l0@pdl@pf l@0phl@pjl@Ppllޅ@pnֆlц@pppƇl@pr`l@pt/l *@pv lf@pxl@qzY`ljT@q`|̊Мl]NJ@q~C0l>@ql@q2l$Уl@ q8;l5 6@ ql@q(l@qo@lXj@qHlX@8qHklXf@qH`lX@qH_l=Z@q0ɐl=Đ@@q0;@l<6@pq0l=@q0/l*@q8ly@ql @qlv{@q8 l@q0bl=]@@q0ܔlxה@pq`Tpl=O@q0ԕlϕ@qHFl=A@Hq0l=@xq0&l=!@q0Pl=@q0l=@ q0tl=o@8 q0l@h q]lX@"q̙lǙ@#q$l@8&q"~ply@Iq l@IqMlH@8Jql@Jq0l@XKql@Kqxlo@`Lqd`l_@Mq۝`l{֝@Oqjlxe@XQqݞ`lY؞@`RqPlK@SqPl@Tq"l@Uql@Vq@l@Wql~@Wql@0Xq{Plv@Xqlۢ@PYq_lZ@Yq`lݣ@pZq[lV@[qΤlɤ@[qOplJ@ \qxȥlå@\q* ld%@@]ql@_q@l@`q8~lly@aq @ll@bq ^llY@dq@Ũ l@@fq2mc-@Hgq(@mf@piqhm@jq ` m@hkq ` m@mq e m`@mqޫ mh٫@nqSmN@0pqެm٬@pq]mX@qqPέ@mQɭ@rq3m.@htqƮ`m@xvq$`m@(xq83@m .@`yq!B`mg=@zq#Gm B@zq%D m ?@{q'?"m:@0|q`)F#m A@|q+?0$m :@8}q-8P%m3@}q/Cp&m)>@p~qp1L*mG@q3Y0/m8T@q@5˺p3mƺ@q7E08m@@`q90=mx@q;Em|@ q=0Gm|@q?Hm<@qA_PImVZ@qCIm9޽@q E|Kmw@ؚqGNm@Мq I`Qm@q@K3Um.@0qM:Xm5@qOA[mp<@0qQ4_m/@@qS)@dm$@8qUgm@qWmm@ЫqYqm @hq[s@um n@q]m @q0_Fm A@q0aw8mhr@ qcmP@(qem@qg@m;@qif@m8a@xqxkxm8@qxmkm8f@hqxom @qqmHm m@qumm ؒmm@0 m,LmPm -pmp(@q ~xno  sndq>