28#ifndef GRID_ASM_AV512_H
29#define GRID_ASM_AV512_H
35#define ZLOADf(OFF,PTR,ri,ir) VLOADf(OFF,PTR,ir) VSHUFf(ir,ri)
36#define ZLOADd(OFF,PTR,ri,ir) VLOADd(OFF,PTR,ir) VSHUFd(ir,ri)
38#define ZMULf(Ari,Air,B,Criir,Ciirr) VMULf(Ari,B,Criir) VMULf(Air,B,Ciirr)
39#define ZMULd(Ari,Air,B,Criir,Ciirr) VMULd(Ari,B,Criir) VMULd(Air,B,Ciirr)
41#define ZMADDf(Ari,Air,B,Criir,Ciirr) VMADDf(Ari,B,Criir) VMADDf(Air,B,Ciirr)
42#define ZMADDd(Ari,Air,B,Criir,Ciirr) VMADDd(Ari,B,Criir) VMADDd(Air,B,Ciirr)
44#define ZENDf(Criir,Ciirr, tmp) ZEND1f(Criir,Ciirr, tmp) ZEND2f(Criir,Ciirr, tmp)
45#define ZENDd(Criir,Ciirr, tmp) ZEND1d(Criir,Ciirr, tmp) ZEND2d(Criir,Ciirr, tmp)
47#define ZMULMEM2SPf(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr) \
49 VMULMEMf(O,P,B,Biirr) \
50 VMULMEMf(O,P,C,Ciirr) \
54#define ZMULMEM2SPd(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr) \
56 VMULMEMd(O,P,B,Biirr) \
61#define ZMADDMEM2SPf(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr) \
63 VMADDMEMf(O,P,B,Biirr) \
64 VMADDMEMf(O,P,C,Ciirr) \
68#define ZMADDMEM2SPd(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr) \
70 VMADDMEMd(O,P,B,Biirr) \
71 VMADDMEMd(O,P,C,Ciirr) \
75#define ZEND1d(Criir,Ciirr, tmp) "vaddpd " #Criir "{cdab} ," #Criir "," #Criir"{%k6}" ";\n"
76#define ZEND2d(Criir,Ciirr, tmp) "vsubpd " #Ciirr "{cdab} ," #Ciirr "," #Criir"{%k7}" ";\n"
78#define ZEND1f(Criir,Ciirr, tmp) "vaddps " #Criir "{cdab} ," #Criir "," #Criir"{%k6}" ";\n"
79#define ZEND2f(Criir,Ciirr, tmp) "vsubps " #Ciirr "{cdab} ," #Ciirr "," #Criir"{%k7}" ";\n"
81#define VTIMESI0f(A,DEST, Z)
82#define VTIMESI1f(A,DEST, Z) "vaddps " #A "{cdab}," #Z "," #DEST"{%k7}" ";\n"
83#define VTIMESI2f(A,DEST, Z) "vsubps " #A "{cdab}," #Z "," #DEST"{%k6}" ";\n"
85#define VTIMESI0d(A,DEST, Z)
86#define VTIMESI1d(A,DEST, Z) "vaddpd " #A "{cdab}," #Z "," #DEST"{%k7}" ";\n"
87#define VTIMESI2d(A,DEST, Z) "vsubpd " #A "{cdab}," #Z "," #DEST"{%k6}" ";\n"
89#define VTIMESMINUSI0f(A,DEST,Z)
90#define VTIMESMINUSI1f(A,DEST,Z) "vsubps " #A "{cdab}," #Z "," #DEST"{%k7}" ";\n"
91#define VTIMESMINUSI2f(A,DEST,Z) "vaddps " #A "{cdab}," #Z "," #DEST"{%k6}" ";\n"
93#define VTIMESMINUSI0d(A,DEST,Z)
94#define VTIMESMINUSI1d(A,DEST,Z) "vsubpd " #A "{cdab}," #Z "," #DEST"{%k7}" ";\n"
95#define VTIMESMINUSI2d(A,DEST,Z) "vaddpd " #A "{cdab}," #Z "," #DEST"{%k6}" ";\n"
97#define VACCTIMESI0f(A,ACC,tmp)
98#define VACCTIMESI1f(A,ACC,tmp) "vaddps " #A "{cdab}," #ACC "," #ACC"{%k7}" ";\n"
99#define VACCTIMESI2f(A,ACC,tmp) "vsubps " #A "{cdab}," #ACC "," #ACC"{%k6}" ";\n"
101#define VACCTIMESI0d(A,ACC,tmp)
102#define VACCTIMESI1d(A,ACC,tmp) "vaddpd " #A "{cdab}," #ACC "," #ACC"{%k7}" ";\n"
103#define VACCTIMESI2d(A,ACC,tmp) "vsubpd " #A "{cdab}," #ACC "," #ACC"{%k6}" ";\n"
105#define VACCTIMESMINUSI0f(A,ACC,tmp)
106#define VACCTIMESMINUSI1f(A,ACC,tmp) "vsubps " #A "{cdab}," #ACC "," #ACC"{%k7}" ";\n"
107#define VACCTIMESMINUSI2f(A,ACC,tmp) "vaddps " #A "{cdab}," #ACC "," #ACC"{%k6}" ";\n"
110#define VACCTIMESMINUSI0d(A,ACC,tmp)
111#define VACCTIMESMINUSI1d(A,ACC,tmp) "vsubpd " #A "{cdab}," #ACC "," #ACC"{%k7}" ";\n"
112#define VACCTIMESMINUSI2d(A,ACC,tmp) "vaddpd " #A "{cdab}," #ACC "," #ACC"{%k6}" ";\n"
117#define VPERM0f(A,B) "vpermf32x4 $0x4e," #A "," #B ";\n"
118#define VPERM1f(A,B) "vpermf32x4 $0xb1," #A "," #B ";\n"
119#define VPERM2f(A,B) "vmovaps " #A "{badc}," #B ";\n"
120#define VPERM3f(A,B) "vmovaps " #A "{cdab}," #B ";\n"
122#define VPERM0d(A,B) "vpermf32x4 $0x4e," #A "," #B ";\n"
123#define VPERM1d(A,B) "vmovapd " #A "{badc}," #B ";\n"
124#define VPERM2d(A,B) "vmovapd " #A "{cdab}," #B ";\n"
125#define VPERM3d(A,B) VMOVd(A,B)
#define VMULMEMd(O, A, B, DEST)