Grid 0.7.0
Intel512imci.h
Go to the documentation of this file.
1/*************************************************************************************
2
3 Grid physics library, www.github.com/paboyle/Grid
4
5 Source file: ./lib/simd/Avx512Asm.h
6
7 Copyright (C) 2015
8
9Author: paboyle <paboyle@ph.ed.ac.uk>
10
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2 of the License, or
14 (at your option) any later version.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
20
21 You should have received a copy of the GNU General Public License along
22 with this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24
25 See the full license in the file "LICENSE" in the top level distribution directory
26*************************************************************************************/
27/* END LEGAL */
28#ifndef GRID_ASM_AV512_H
29#define GRID_ASM_AV512_H
30
32// Knights Corner specials
34
35#define ZLOADf(OFF,PTR,ri,ir) VLOADf(OFF,PTR,ir) VSHUFf(ir,ri)
36#define ZLOADd(OFF,PTR,ri,ir) VLOADd(OFF,PTR,ir) VSHUFd(ir,ri)
37
38#define ZMULf(Ari,Air,B,Criir,Ciirr) VMULf(Ari,B,Criir) VMULf(Air,B,Ciirr)
39#define ZMULd(Ari,Air,B,Criir,Ciirr) VMULd(Ari,B,Criir) VMULd(Air,B,Ciirr)
40
41#define ZMADDf(Ari,Air,B,Criir,Ciirr) VMADDf(Ari,B,Criir) VMADDf(Air,B,Ciirr)
42#define ZMADDd(Ari,Air,B,Criir,Ciirr) VMADDd(Ari,B,Criir) VMADDd(Air,B,Ciirr)
43
44#define ZENDf(Criir,Ciirr, tmp) ZEND1f(Criir,Ciirr, tmp) ZEND2f(Criir,Ciirr, tmp)
45#define ZENDd(Criir,Ciirr, tmp) ZEND1d(Criir,Ciirr, tmp) ZEND2d(Criir,Ciirr, tmp)
46
47#define ZMULMEM2SPf(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr) \
48 VSHUFMEMf(O,P,tmp) \
49 VMULMEMf(O,P,B,Biirr) \
50 VMULMEMf(O,P,C,Ciirr) \
51 VMULf(tmp,B,Briir) \
52 VMULf(tmp,C,Criir)
53
54#define ZMULMEM2SPd(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr) \
55 VSHUFMEMd(O,P,tmp) \
56 VMULMEMd(O,P,B,Biirr) \
57VMULMEMd(O,P,C,Ciirr) \
58VMULd(tmp,B,Briir) \
59VMULd(tmp,C,Criir)
60
61#define ZMADDMEM2SPf(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr) \
62 VSHUFMEMf(O,P,tmp) \
63 VMADDMEMf(O,P,B,Biirr) \
64 VMADDMEMf(O,P,C,Ciirr) \
65 VMADDf(tmp,B,Briir) \
66 VMADDf(tmp,C,Criir)
67
68#define ZMADDMEM2SPd(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr) \
69 VSHUFMEMd(O,P,tmp) \
70 VMADDMEMd(O,P,B,Biirr) \
71 VMADDMEMd(O,P,C,Ciirr) \
72 VMADDd(tmp,B,Briir) \
73 VMADDd(tmp,C,Criir)
74
75#define ZEND1d(Criir,Ciirr, tmp) "vaddpd " #Criir "{cdab} ," #Criir "," #Criir"{%k6}" ";\n"
76#define ZEND2d(Criir,Ciirr, tmp) "vsubpd " #Ciirr "{cdab} ," #Ciirr "," #Criir"{%k7}" ";\n"
77
78#define ZEND1f(Criir,Ciirr, tmp) "vaddps " #Criir "{cdab} ," #Criir "," #Criir"{%k6}" ";\n"
79#define ZEND2f(Criir,Ciirr, tmp) "vsubps " #Ciirr "{cdab} ," #Ciirr "," #Criir"{%k7}" ";\n"
80
81#define VTIMESI0f(A,DEST, Z)
82#define VTIMESI1f(A,DEST, Z) "vaddps " #A "{cdab}," #Z "," #DEST"{%k7}" ";\n"
83#define VTIMESI2f(A,DEST, Z) "vsubps " #A "{cdab}," #Z "," #DEST"{%k6}" ";\n"
84
85#define VTIMESI0d(A,DEST, Z)
86#define VTIMESI1d(A,DEST, Z) "vaddpd " #A "{cdab}," #Z "," #DEST"{%k7}" ";\n"
87#define VTIMESI2d(A,DEST, Z) "vsubpd " #A "{cdab}," #Z "," #DEST"{%k6}" ";\n"
88
89#define VTIMESMINUSI0f(A,DEST,Z)
90#define VTIMESMINUSI1f(A,DEST,Z) "vsubps " #A "{cdab}," #Z "," #DEST"{%k7}" ";\n"
91#define VTIMESMINUSI2f(A,DEST,Z) "vaddps " #A "{cdab}," #Z "," #DEST"{%k6}" ";\n"
92
93#define VTIMESMINUSI0d(A,DEST,Z)
94#define VTIMESMINUSI1d(A,DEST,Z) "vsubpd " #A "{cdab}," #Z "," #DEST"{%k7}" ";\n"
95#define VTIMESMINUSI2d(A,DEST,Z) "vaddpd " #A "{cdab}," #Z "," #DEST"{%k6}" ";\n"
96
97#define VACCTIMESI0f(A,ACC,tmp)
98#define VACCTIMESI1f(A,ACC,tmp) "vaddps " #A "{cdab}," #ACC "," #ACC"{%k7}" ";\n"
99#define VACCTIMESI2f(A,ACC,tmp) "vsubps " #A "{cdab}," #ACC "," #ACC"{%k6}" ";\n"
100
101#define VACCTIMESI0d(A,ACC,tmp)
102#define VACCTIMESI1d(A,ACC,tmp) "vaddpd " #A "{cdab}," #ACC "," #ACC"{%k7}" ";\n"
103#define VACCTIMESI2d(A,ACC,tmp) "vsubpd " #A "{cdab}," #ACC "," #ACC"{%k6}" ";\n"
104
105#define VACCTIMESMINUSI0f(A,ACC,tmp)
106#define VACCTIMESMINUSI1f(A,ACC,tmp) "vsubps " #A "{cdab}," #ACC "," #ACC"{%k7}" ";\n"
107#define VACCTIMESMINUSI2f(A,ACC,tmp) "vaddps " #A "{cdab}," #ACC "," #ACC"{%k6}" ";\n"
108
109// Acc = Acc - i A
110#define VACCTIMESMINUSI0d(A,ACC,tmp)
111#define VACCTIMESMINUSI1d(A,ACC,tmp) "vsubpd " #A "{cdab}," #ACC "," #ACC"{%k7}" ";\n"
112#define VACCTIMESMINUSI2d(A,ACC,tmp) "vaddpd " #A "{cdab}," #ACC "," #ACC"{%k6}" ";\n"
113
114//((1<<6)|(0<<4)|(3<<2)|(2)) == 0100,1110 = 0x4e
115//((2<<6)|(3<<4)|(0<<2)|(1)) == 1011,0001 = 0xb1
116
117#define VPERM0f(A,B) "vpermf32x4 $0x4e," #A "," #B ";\n"
118#define VPERM1f(A,B) "vpermf32x4 $0xb1," #A "," #B ";\n"
119#define VPERM2f(A,B) "vmovaps " #A "{badc}," #B ";\n"
120#define VPERM3f(A,B) "vmovaps " #A "{cdab}," #B ";\n"
121
122#define VPERM0d(A,B) "vpermf32x4 $0x4e," #A "," #B ";\n"
123#define VPERM1d(A,B) "vmovapd " #A "{badc}," #B ";\n"
124#define VPERM2d(A,B) "vmovapd " #A "{cdab}," #B ";\n"
125#define VPERM3d(A,B) VMOVd(A,B)
126
127#endif
#define VMULMEMd(O, A, B, DEST)
B