mpy.idef (54669B)
1 /* 2 * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 16 */ 17 18 /* 19 * Multiply Instructions 20 */ 21 22 23 #define STD_SP_MODES(TAG,OPER,ATR,DST,ACCSEM,SEM,OSEM,SATSEM,RNDSEM)\ 24 Q6INSN(M2_##TAG##_hh_s0, OPER"(Rs.H32,Rt.H32)"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM( fGETHALF(1,RsV),fGETHALF(1,RtV))));})\ 25 Q6INSN(M2_##TAG##_hh_s1, OPER"(Rs.H32,Rt.H32):<<1"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETHALF(1,RsV),fGETHALF(1,RtV)))));})\ 26 Q6INSN(M2_##TAG##_hl_s0, OPER"(Rs.H32,Rt.L32)"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM( fGETHALF(1,RsV),fGETHALF(0,RtV))));})\ 27 Q6INSN(M2_##TAG##_hl_s1, OPER"(Rs.H32,Rt.L32):<<1"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETHALF(1,RsV),fGETHALF(0,RtV)))));})\ 28 Q6INSN(M2_##TAG##_lh_s0, OPER"(Rs.L32,Rt.H32)"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM( fGETHALF(0,RsV),fGETHALF(1,RtV))));})\ 29 Q6INSN(M2_##TAG##_lh_s1, OPER"(Rs.L32,Rt.H32):<<1"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETHALF(0,RsV),fGETHALF(1,RtV)))));})\ 30 Q6INSN(M2_##TAG##_ll_s0, OPER"(Rs.L32,Rt.L32)"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM( fGETHALF(0,RsV),fGETHALF(0,RtV))));})\ 31 Q6INSN(M2_##TAG##_ll_s1, OPER"(Rs.L32,Rt.L32):<<1"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETHALF(0,RsV),fGETHALF(0,RtV)))));}) 32 33 /*****************************************************/ 34 /* multiply 16x16->32 signed instructions */ 35 /*****************************************************/ 36 STD_SP_MODES(mpy_acc, "Rx32+=mpy", ,RxV,RxV+ ,fMPY16SS, ,fPASS,fPASS) 37 STD_SP_MODES(mpy_nac, "Rx32-=mpy", ,RxV,RxV- ,fMPY16SS, ,fPASS,fPASS) 38 STD_SP_MODES(mpy_acc_sat,"Rx32+=mpy", ,RxV,RxV+ ,fMPY16SS,":sat" ,fSAT, fPASS) 39 STD_SP_MODES(mpy_nac_sat,"Rx32-=mpy", ,RxV,RxV- ,fMPY16SS,":sat" ,fSAT, fPASS) 40 STD_SP_MODES(mpy, "Rd32=mpy", ,RdV, ,fMPY16SS, ,fPASS,fPASS) 41 STD_SP_MODES(mpy_sat, "Rd32=mpy", ,RdV, ,fMPY16SS,":sat" ,fSAT, fPASS) 42 STD_SP_MODES(mpy_rnd, "Rd32=mpy", ,RdV, ,fMPY16SS,":rnd" ,fPASS,fROUND) 43 STD_SP_MODES(mpy_sat_rnd,"Rd32=mpy", ,RdV, ,fMPY16SS,":rnd:sat",fSAT, fROUND) 44 STD_SP_MODES(mpyd_acc, "Rxx32+=mpy",,RxxV,RxxV+ ,fMPY16SS, ,fPASS,fPASS) 45 STD_SP_MODES(mpyd_nac, "Rxx32-=mpy",,RxxV,RxxV- ,fMPY16SS, ,fPASS,fPASS) 46 STD_SP_MODES(mpyd, "Rdd32=mpy", ,RddV, ,fMPY16SS, ,fPASS,fPASS) 47 STD_SP_MODES(mpyd_rnd, "Rdd32=mpy", ,RddV, ,fMPY16SS,":rnd" ,fPASS,fROUND) 48 49 50 /*****************************************************/ 51 /* multiply 16x16->32 unsigned instructions */ 52 /*****************************************************/ 53 #define STD_USP_MODES(TAG,OPER,ATR,DST,ACCSEM,SEM,OSEM,SATSEM,RNDSEM)\ 54 Q6INSN(M2_##TAG##_hh_s0, OPER"(Rs.H32,Rt.H32)"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM( fGETUHALF(1,RsV),fGETUHALF(1,RtV))));})\ 55 Q6INSN(M2_##TAG##_hh_s1, OPER"(Rs.H32,Rt.H32):<<1"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETUHALF(1,RsV),fGETUHALF(1,RtV)))));})\ 56 Q6INSN(M2_##TAG##_hl_s0, OPER"(Rs.H32,Rt.L32)"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM( fGETUHALF(1,RsV),fGETUHALF(0,RtV))));})\ 57 Q6INSN(M2_##TAG##_hl_s1, OPER"(Rs.H32,Rt.L32):<<1"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETUHALF(1,RsV),fGETUHALF(0,RtV)))));})\ 58 Q6INSN(M2_##TAG##_lh_s0, OPER"(Rs.L32,Rt.H32)"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM( fGETUHALF(0,RsV),fGETUHALF(1,RtV))));})\ 59 Q6INSN(M2_##TAG##_lh_s1, OPER"(Rs.L32,Rt.H32):<<1"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETUHALF(0,RsV),fGETUHALF(1,RtV)))));})\ 60 Q6INSN(M2_##TAG##_ll_s0, OPER"(Rs.L32,Rt.L32)"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM( fGETUHALF(0,RsV),fGETUHALF(0,RtV))));})\ 61 Q6INSN(M2_##TAG##_ll_s1, OPER"(Rs.L32,Rt.L32):<<1"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETUHALF(0,RsV),fGETUHALF(0,RtV)))));}) 62 63 STD_USP_MODES(mpyu_acc, "Rx32+=mpyu", ,RxV,RxV+ ,fMPY16UU, ,fPASS,fPASS) 64 STD_USP_MODES(mpyu_nac, "Rx32-=mpyu", ,RxV,RxV- ,fMPY16UU, ,fPASS,fPASS) 65 STD_USP_MODES(mpyu, "Rd32=mpyu", ATTRIBS() ,RdV, ,fMPY16UU, ,fPASS,fPASS) 66 STD_USP_MODES(mpyud_acc, "Rxx32+=mpyu",,RxxV,RxxV+,fMPY16UU, ,fPASS,fPASS) 67 STD_USP_MODES(mpyud_nac, "Rxx32-=mpyu",,RxxV,RxxV-,fMPY16UU, ,fPASS,fPASS) 68 STD_USP_MODES(mpyud, "Rdd32=mpyu", ATTRIBS() ,RddV, ,fMPY16UU, ,fPASS,fPASS) 69 70 /**********************************************/ 71 /* mpy 16x#s8->32 */ 72 /**********************************************/ 73 74 Q6INSN(M2_mpysip,"Rd32=+mpyi(Rs32,#u8)",ATTRIBS(A_ARCHV2), 75 "32-bit Multiply by unsigned immediate", 76 { fIMMEXT(uiV); RdV=RsV*uiV; }) 77 78 Q6INSN(M2_mpysin,"Rd32=-mpyi(Rs32,#u8)",ATTRIBS(A_ARCHV2), 79 "32-bit Multiply by unsigned immediate, negate result", 80 { RdV=RsV*-uiV; }) 81 82 Q6INSN(M2_macsip,"Rx32+=mpyi(Rs32,#u8)",ATTRIBS(A_ARCHV2), 83 "32-bit Multiply-Add by unsigned immediate", 84 { fIMMEXT(uiV); RxV=RxV + (RsV*uiV);}) 85 86 Q6INSN(M2_macsin,"Rx32-=mpyi(Rs32,#u8)",ATTRIBS(A_ARCHV2), 87 "32-bit Multiply-Subtract by unsigned immediate", 88 { fIMMEXT(uiV); RxV=RxV - (RsV*uiV);}) 89 90 91 /**********************************************/ 92 /* multiply/mac 32x32->64 instructions */ 93 /**********************************************/ 94 Q6INSN(M2_dpmpyss_s0, "Rdd32=mpy(Rs32,Rt32)", ATTRIBS(),"Multiply 32x32",{RddV=fMPY32SS(RsV,RtV);}) 95 Q6INSN(M2_dpmpyss_acc_s0,"Rxx32+=mpy(Rs32,Rt32)",ATTRIBS(),"Multiply 32x32",{RxxV= RxxV + fMPY32SS(RsV,RtV);}) 96 Q6INSN(M2_dpmpyss_nac_s0,"Rxx32-=mpy(Rs32,Rt32)",ATTRIBS(),"Multiply 32x32",{RxxV= RxxV - fMPY32SS(RsV,RtV);}) 97 98 Q6INSN(M2_dpmpyuu_s0, "Rdd32=mpyu(Rs32,Rt32)", ATTRIBS(),"Multiply 32x32",{RddV=fMPY32UU(fCAST4u(RsV),fCAST4u(RtV));}) 99 Q6INSN(M2_dpmpyuu_acc_s0,"Rxx32+=mpyu(Rs32,Rt32)",ATTRIBS(),"Multiply 32x32",{RxxV= RxxV + fMPY32UU(fCAST4u(RsV),fCAST4u(RtV));}) 100 Q6INSN(M2_dpmpyuu_nac_s0,"Rxx32-=mpyu(Rs32,Rt32)",ATTRIBS(),"Multiply 32x32",{RxxV= RxxV - fMPY32UU(fCAST4u(RsV),fCAST4u(RtV));}) 101 102 103 /******************************************************/ 104 /* multiply/mac 32x32->32 (upper) instructions */ 105 /******************************************************/ 106 Q6INSN(M2_mpy_up, "Rd32=mpy(Rs32,Rt32)", ATTRIBS(),"Multiply 32x32",{RdV=fMPY32SS(RsV,RtV)>>32;}) 107 Q6INSN(M2_mpy_up_s1, "Rd32=mpy(Rs32,Rt32):<<1", ATTRIBS(),"Multiply 32x32",{RdV=fMPY32SS(RsV,RtV)>>31;}) 108 Q6INSN(M2_mpy_up_s1_sat, "Rd32=mpy(Rs32,Rt32):<<1:sat", ATTRIBS(),"Multiply 32x32",{RdV=fSAT(fMPY32SS(RsV,RtV)>>31);}) 109 Q6INSN(M2_mpyu_up, "Rd32=mpyu(Rs32,Rt32)", ATTRIBS(),"Multiply 32x32",{RdV=fMPY32UU(fCAST4u(RsV),fCAST4u(RtV))>>32;}) 110 Q6INSN(M2_mpysu_up, "Rd32=mpysu(Rs32,Rt32)", ATTRIBS(),"Multiply 32x32",{RdV=fMPY32SU(RsV,fCAST4u(RtV))>>32;}) 111 Q6INSN(M2_dpmpyss_rnd_s0,"Rd32=mpy(Rs32,Rt32):rnd", ATTRIBS(),"Multiply 32x32",{RdV=(fMPY32SS(RsV,RtV)+fCONSTLL(0x80000000))>>32;}) 112 113 Q6INSN(M4_mac_up_s1_sat, "Rx32+=mpy(Rs32,Rt32):<<1:sat", ATTRIBS(),"Multiply 32x32",{RxV=fSAT( (fSE32_64(RxV)) + (fMPY32SS(RsV,RtV)>>31));}) 114 Q6INSN(M4_nac_up_s1_sat, "Rx32-=mpy(Rs32,Rt32):<<1:sat", ATTRIBS(),"Multiply 32x32",{RxV=fSAT( (fSE32_64(RxV)) - (fMPY32SS(RsV,RtV)>>31));}) 115 116 117 /**********************************************/ 118 /* 32x32->32 multiply (lower) */ 119 /**********************************************/ 120 121 Q6INSN(M2_mpyi,"Rd32=mpyi(Rs32,Rt32)",ATTRIBS(), 122 "Multiply Integer", 123 { RdV=RsV*RtV;}) 124 125 Q6INSN(M2_maci,"Rx32+=mpyi(Rs32,Rt32)",ATTRIBS(A_ARCHV2), 126 "Multiply-Accumulate Integer", 127 { RxV=RxV + RsV*RtV;}) 128 129 Q6INSN(M2_mnaci,"Rx32-=mpyi(Rs32,Rt32)",ATTRIBS(A_ARCHV2), 130 "Multiply-Neg-Accumulate Integer", 131 { RxV=RxV - RsV*RtV;}) 132 133 /****** WHY ARE THESE IN MPY.IDEF? **********/ 134 135 Q6INSN(M2_acci,"Rx32+=add(Rs32,Rt32)",ATTRIBS(A_ARCHV2), 136 "Add with accumulate", 137 { RxV=RxV + RsV + RtV;}) 138 139 Q6INSN(M2_accii,"Rx32+=add(Rs32,#s8)",ATTRIBS(A_ARCHV2), 140 "Add with accumulate", 141 { fIMMEXT(siV); RxV=RxV + RsV + siV;}) 142 143 Q6INSN(M2_nacci,"Rx32-=add(Rs32,Rt32)",ATTRIBS(A_ARCHV2), 144 "Add with neg accumulate", 145 { RxV=RxV - (RsV + RtV);}) 146 147 Q6INSN(M2_naccii,"Rx32-=add(Rs32,#s8)",ATTRIBS(A_ARCHV2), 148 "Add with neg accumulate", 149 { fIMMEXT(siV); RxV=RxV - (RsV + siV);}) 150 151 Q6INSN(M2_subacc,"Rx32+=sub(Rt32,Rs32)",ATTRIBS(A_ARCHV2), 152 "Sub with accumulate", 153 { RxV=RxV + RtV - RsV;}) 154 155 156 157 158 Q6INSN(M4_mpyrr_addr,"Ry32=add(Ru32,mpyi(Ry32,Rs32))",ATTRIBS(), 159 "Mpy by immed and add immed", 160 { RyV = RuV + RsV*RyV;}) 161 162 Q6INSN(M4_mpyri_addr_u2,"Rd32=add(Ru32,mpyi(#u6:2,Rs32))",ATTRIBS(), 163 "Mpy by immed and add immed", 164 { RdV = RuV + RsV*uiV;}) 165 166 Q6INSN(M4_mpyri_addr,"Rd32=add(Ru32,mpyi(Rs32,#u6))",ATTRIBS(), 167 "Mpy by immed and add immed", 168 { fIMMEXT(uiV); RdV = RuV + RsV*uiV;}) 169 170 171 172 Q6INSN(M4_mpyri_addi,"Rd32=add(#u6,mpyi(Rs32,#U6))",ATTRIBS(), 173 "Mpy by immed and add immed", 174 { fIMMEXT(uiV); RdV = uiV + RsV*UiV;}) 175 176 177 178 Q6INSN(M4_mpyrr_addi,"Rd32=add(#u6,mpyi(Rs32,Rt32))",ATTRIBS(), 179 "Mpy by immed and add immed", 180 { fIMMEXT(uiV); RdV = uiV + RsV*RtV;}) 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 /**********************************************/ 199 /* vector mac 2x[16x16 -> 32] */ 200 /**********************************************/ 201 202 #undef vmac_sema 203 #define vmac_sema(N)\ 204 { fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV)))));\ 205 fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV)))));\ 206 } 207 Q6INSN(M2_vmpy2s_s0,"Rdd32=vmpyh(Rs32,Rt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0)) 208 Q6INSN(M2_vmpy2s_s1,"Rdd32=vmpyh(Rs32,Rt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1)) 209 210 211 #undef vmac_sema 212 #define vmac_sema(N)\ 213 { fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV)))));\ 214 fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV)))));\ 215 } 216 Q6INSN(M2_vmac2s_s0,"Rxx32+=vmpyh(Rs32,Rt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0)) 217 Q6INSN(M2_vmac2s_s1,"Rxx32+=vmpyh(Rs32,Rt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1)) 218 219 #undef vmac_sema 220 #define vmac_sema(N)\ 221 { fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SU(fGETHALF(0,RsV),fGETUHALF(0,RtV)))));\ 222 fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SU(fGETHALF(1,RsV),fGETUHALF(1,RtV)))));\ 223 } 224 Q6INSN(M2_vmpy2su_s0,"Rdd32=vmpyhsu(Rs32,Rt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0)) 225 Q6INSN(M2_vmpy2su_s1,"Rdd32=vmpyhsu(Rs32,Rt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1)) 226 227 228 #undef vmac_sema 229 #define vmac_sema(N)\ 230 { fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SU(fGETHALF(0,RsV),fGETUHALF(0,RtV)))));\ 231 fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SU(fGETHALF(1,RsV),fGETUHALF(1,RtV)))));\ 232 } 233 Q6INSN(M2_vmac2su_s0,"Rxx32+=vmpyhsu(Rs32,Rt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0)) 234 Q6INSN(M2_vmac2su_s1,"Rxx32+=vmpyhsu(Rs32,Rt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1)) 235 236 237 238 #undef vmac_sema 239 #define vmac_sema(N)\ 240 { fSETHALF(1,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV))) + 0x8000))));\ 241 fSETHALF(0,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV))) + 0x8000))));\ 242 } 243 Q6INSN(M2_vmpy2s_s0pack,"Rd32=vmpyh(Rs32,Rt32):rnd:sat",ATTRIBS(A_ARCHV2),"Vector Multiply",vmac_sema(0)) 244 Q6INSN(M2_vmpy2s_s1pack,"Rd32=vmpyh(Rs32,Rt32):<<1:rnd:sat",ATTRIBS(A_ARCHV2),"Vector Multiply",vmac_sema(1)) 245 246 247 #undef vmac_sema 248 #define vmac_sema(N)\ 249 { fSETWORD(0,RxxV,fGETWORD(0,RxxV) + fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV)));\ 250 fSETWORD(1,RxxV,fGETWORD(1,RxxV) + fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV)));\ 251 } 252 Q6INSN(M2_vmac2,"Rxx32+=vmpyh(Rs32,Rt32)",ATTRIBS(A_ARCHV2),"Vector Multiply",vmac_sema(0)) 253 254 #undef vmac_sema 255 #define vmac_sema(N)\ 256 { fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)))));\ 257 fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)))));\ 258 } 259 Q6INSN(M2_vmpy2es_s0,"Rdd32=vmpyeh(Rss32,Rtt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0)) 260 Q6INSN(M2_vmpy2es_s1,"Rdd32=vmpyeh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1)) 261 262 #undef vmac_sema 263 #define vmac_sema(N)\ 264 { fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)))));\ 265 fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)))));\ 266 } 267 Q6INSN(M2_vmac2es_s0,"Rxx32+=vmpyeh(Rss32,Rtt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0)) 268 Q6INSN(M2_vmac2es_s1,"Rxx32+=vmpyeh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1)) 269 270 #undef vmac_sema 271 #define vmac_sema(N)\ 272 { fSETWORD(0,RxxV,fGETWORD(0,RxxV) + fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)));\ 273 fSETWORD(1,RxxV,fGETWORD(1,RxxV) + fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)));\ 274 } 275 Q6INSN(M2_vmac2es,"Rxx32+=vmpyeh(Rss32,Rtt32)",ATTRIBS(A_ARCHV2),"Vector Multiply",vmac_sema(0)) 276 277 278 279 280 /********************************************************/ 281 /* vrmpyh, aka Big Mac, aka Mac Daddy, aka Mac-ac-ac-ac */ 282 /* vector mac 4x[16x16] + 64 ->64 */ 283 /********************************************************/ 284 285 286 #undef vmac_sema 287 #define vmac_sema(N)\ 288 { RxxV = RxxV + fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV))\ 289 + fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV))\ 290 + fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV))\ 291 + fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\ 292 } 293 Q6INSN(M2_vrmac_s0,"Rxx32+=vrmpyh(Rss32,Rtt32)",ATTRIBS(),"Vector Multiply",vmac_sema(0)) 294 295 #undef vmac_sema 296 #define vmac_sema(N)\ 297 { RddV = fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV))\ 298 + fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV))\ 299 + fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV))\ 300 + fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\ 301 } 302 Q6INSN(M2_vrmpy_s0,"Rdd32=vrmpyh(Rss32,Rtt32)",ATTRIBS(),"Vector Multiply",vmac_sema(0)) 303 304 305 306 /******************************************************/ 307 /* vector dual macs. just like complex */ 308 /******************************************************/ 309 310 311 /* With round&pack */ 312 #undef dmpy_sema 313 #define dmpy_sema(N)\ 314 { fSETHALF(0,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV))) + \ 315 fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV))) + 0x8000))));\ 316 fSETHALF(1,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV))) + \ 317 fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV))) + 0x8000))));\ 318 } 319 Q6INSN(M2_vdmpyrs_s0,"Rd32=vdmpy(Rss32,Rtt32):rnd:sat",ATTRIBS(), "vector dual mac w/ round&pack",dmpy_sema(0)) 320 Q6INSN(M2_vdmpyrs_s1,"Rd32=vdmpy(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"vector dual mac w/ round&pack",dmpy_sema(1)) 321 322 323 324 325 326 /******************************************************/ 327 /* vector byte multiplies */ 328 /******************************************************/ 329 330 331 Q6INSN(M5_vrmpybuu,"Rdd32=vrmpybu(Rss32,Rtt32)",ATTRIBS(), 332 "vector dual mpy bytes", 333 { 334 fSETWORD(0,RddV,(fMPY16SS(fGETUBYTE(0,RssV),fGETUBYTE(0,RttV)) + 335 fMPY16SS(fGETUBYTE(1,RssV),fGETUBYTE(1,RttV)) + 336 fMPY16SS(fGETUBYTE(2,RssV),fGETUBYTE(2,RttV)) + 337 fMPY16SS(fGETUBYTE(3,RssV),fGETUBYTE(3,RttV)))); 338 fSETWORD(1,RddV,(fMPY16SS(fGETUBYTE(4,RssV),fGETUBYTE(4,RttV)) + 339 fMPY16SS(fGETUBYTE(5,RssV),fGETUBYTE(5,RttV)) + 340 fMPY16SS(fGETUBYTE(6,RssV),fGETUBYTE(6,RttV)) + 341 fMPY16SS(fGETUBYTE(7,RssV),fGETUBYTE(7,RttV)))); 342 }) 343 344 Q6INSN(M5_vrmacbuu,"Rxx32+=vrmpybu(Rss32,Rtt32)",ATTRIBS(), 345 "vector dual mac bytes", 346 { 347 fSETWORD(0,RxxV,(fGETWORD(0,RxxV) + 348 fMPY16SS(fGETUBYTE(0,RssV),fGETUBYTE(0,RttV)) + 349 fMPY16SS(fGETUBYTE(1,RssV),fGETUBYTE(1,RttV)) + 350 fMPY16SS(fGETUBYTE(2,RssV),fGETUBYTE(2,RttV)) + 351 fMPY16SS(fGETUBYTE(3,RssV),fGETUBYTE(3,RttV)))); 352 fSETWORD(1,RxxV,(fGETWORD(1,RxxV) + 353 fMPY16SS(fGETUBYTE(4,RssV),fGETUBYTE(4,RttV)) + 354 fMPY16SS(fGETUBYTE(5,RssV),fGETUBYTE(5,RttV)) + 355 fMPY16SS(fGETUBYTE(6,RssV),fGETUBYTE(6,RttV)) + 356 fMPY16SS(fGETUBYTE(7,RssV),fGETUBYTE(7,RttV)))); 357 }) 358 359 360 Q6INSN(M5_vrmpybsu,"Rdd32=vrmpybsu(Rss32,Rtt32)",ATTRIBS(), 361 "vector dual mpy bytes", 362 { 363 fSETWORD(0,RddV,(fMPY16SS(fGETBYTE(0,RssV),fGETUBYTE(0,RttV)) + 364 fMPY16SS(fGETBYTE(1,RssV),fGETUBYTE(1,RttV)) + 365 fMPY16SS(fGETBYTE(2,RssV),fGETUBYTE(2,RttV)) + 366 fMPY16SS(fGETBYTE(3,RssV),fGETUBYTE(3,RttV)))); 367 fSETWORD(1,RddV,(fMPY16SS(fGETBYTE(4,RssV),fGETUBYTE(4,RttV)) + 368 fMPY16SS(fGETBYTE(5,RssV),fGETUBYTE(5,RttV)) + 369 fMPY16SS(fGETBYTE(6,RssV),fGETUBYTE(6,RttV)) + 370 fMPY16SS(fGETBYTE(7,RssV),fGETUBYTE(7,RttV)))); 371 }) 372 373 Q6INSN(M5_vrmacbsu,"Rxx32+=vrmpybsu(Rss32,Rtt32)",ATTRIBS(), 374 "vector dual mac bytes", 375 { 376 fSETWORD(0,RxxV,(fGETWORD(0,RxxV) + 377 fMPY16SS(fGETBYTE(0,RssV),fGETUBYTE(0,RttV)) + 378 fMPY16SS(fGETBYTE(1,RssV),fGETUBYTE(1,RttV)) + 379 fMPY16SS(fGETBYTE(2,RssV),fGETUBYTE(2,RttV)) + 380 fMPY16SS(fGETBYTE(3,RssV),fGETUBYTE(3,RttV)))); 381 fSETWORD(1,RxxV,(fGETWORD(1,RxxV) + 382 fMPY16SS(fGETBYTE(4,RssV),fGETUBYTE(4,RttV)) + 383 fMPY16SS(fGETBYTE(5,RssV),fGETUBYTE(5,RttV)) + 384 fMPY16SS(fGETBYTE(6,RssV),fGETUBYTE(6,RttV)) + 385 fMPY16SS(fGETBYTE(7,RssV),fGETUBYTE(7,RttV)))); 386 }) 387 388 389 Q6INSN(M5_vmpybuu,"Rdd32=vmpybu(Rs32,Rt32)",ATTRIBS(), 390 "vector mpy bytes", 391 { 392 fSETHALF(0,RddV,(fMPY16SS(fGETUBYTE(0,RsV),fGETUBYTE(0,RtV)))); 393 fSETHALF(1,RddV,(fMPY16SS(fGETUBYTE(1,RsV),fGETUBYTE(1,RtV)))); 394 fSETHALF(2,RddV,(fMPY16SS(fGETUBYTE(2,RsV),fGETUBYTE(2,RtV)))); 395 fSETHALF(3,RddV,(fMPY16SS(fGETUBYTE(3,RsV),fGETUBYTE(3,RtV)))); 396 }) 397 398 Q6INSN(M5_vmpybsu,"Rdd32=vmpybsu(Rs32,Rt32)",ATTRIBS(), 399 "vector mpy bytes", 400 { 401 fSETHALF(0,RddV,(fMPY16SS(fGETBYTE(0,RsV),fGETUBYTE(0,RtV)))); 402 fSETHALF(1,RddV,(fMPY16SS(fGETBYTE(1,RsV),fGETUBYTE(1,RtV)))); 403 fSETHALF(2,RddV,(fMPY16SS(fGETBYTE(2,RsV),fGETUBYTE(2,RtV)))); 404 fSETHALF(3,RddV,(fMPY16SS(fGETBYTE(3,RsV),fGETUBYTE(3,RtV)))); 405 }) 406 407 408 Q6INSN(M5_vmacbuu,"Rxx32+=vmpybu(Rs32,Rt32)",ATTRIBS(), 409 "vector mac bytes", 410 { 411 fSETHALF(0,RxxV,(fGETHALF(0,RxxV)+fMPY16SS(fGETUBYTE(0,RsV),fGETUBYTE(0,RtV)))); 412 fSETHALF(1,RxxV,(fGETHALF(1,RxxV)+fMPY16SS(fGETUBYTE(1,RsV),fGETUBYTE(1,RtV)))); 413 fSETHALF(2,RxxV,(fGETHALF(2,RxxV)+fMPY16SS(fGETUBYTE(2,RsV),fGETUBYTE(2,RtV)))); 414 fSETHALF(3,RxxV,(fGETHALF(3,RxxV)+fMPY16SS(fGETUBYTE(3,RsV),fGETUBYTE(3,RtV)))); 415 }) 416 417 Q6INSN(M5_vmacbsu,"Rxx32+=vmpybsu(Rs32,Rt32)",ATTRIBS(), 418 "vector mac bytes", 419 { 420 fSETHALF(0,RxxV,(fGETHALF(0,RxxV)+fMPY16SS(fGETBYTE(0,RsV),fGETUBYTE(0,RtV)))); 421 fSETHALF(1,RxxV,(fGETHALF(1,RxxV)+fMPY16SS(fGETBYTE(1,RsV),fGETUBYTE(1,RtV)))); 422 fSETHALF(2,RxxV,(fGETHALF(2,RxxV)+fMPY16SS(fGETBYTE(2,RsV),fGETUBYTE(2,RtV)))); 423 fSETHALF(3,RxxV,(fGETHALF(3,RxxV)+fMPY16SS(fGETBYTE(3,RsV),fGETUBYTE(3,RtV)))); 424 }) 425 426 427 428 Q6INSN(M5_vdmpybsu,"Rdd32=vdmpybsu(Rss32,Rtt32):sat",ATTRIBS(), 429 "vector quad mpy bytes", 430 { 431 fSETHALF(0,RddV,fSATN(16,(fMPY16SS(fGETBYTE(0,RssV),fGETUBYTE(0,RttV)) + 432 fMPY16SS(fGETBYTE(1,RssV),fGETUBYTE(1,RttV))))); 433 fSETHALF(1,RddV,fSATN(16,(fMPY16SS(fGETBYTE(2,RssV),fGETUBYTE(2,RttV)) + 434 fMPY16SS(fGETBYTE(3,RssV),fGETUBYTE(3,RttV))))); 435 fSETHALF(2,RddV,fSATN(16,(fMPY16SS(fGETBYTE(4,RssV),fGETUBYTE(4,RttV)) + 436 fMPY16SS(fGETBYTE(5,RssV),fGETUBYTE(5,RttV))))); 437 fSETHALF(3,RddV,fSATN(16,(fMPY16SS(fGETBYTE(6,RssV),fGETUBYTE(6,RttV)) + 438 fMPY16SS(fGETBYTE(7,RssV),fGETUBYTE(7,RttV))))); 439 }) 440 441 442 Q6INSN(M5_vdmacbsu,"Rxx32+=vdmpybsu(Rss32,Rtt32):sat",ATTRIBS(), 443 "vector quad mac bytes", 444 { 445 fSETHALF(0,RxxV,fSATN(16,(fGETHALF(0,RxxV) + 446 fMPY16SS(fGETBYTE(0,RssV),fGETUBYTE(0,RttV)) + 447 fMPY16SS(fGETBYTE(1,RssV),fGETUBYTE(1,RttV))))); 448 fSETHALF(1,RxxV,fSATN(16,(fGETHALF(1,RxxV) + 449 fMPY16SS(fGETBYTE(2,RssV),fGETUBYTE(2,RttV)) + 450 fMPY16SS(fGETBYTE(3,RssV),fGETUBYTE(3,RttV))))); 451 fSETHALF(2,RxxV,fSATN(16,(fGETHALF(2,RxxV) + 452 fMPY16SS(fGETBYTE(4,RssV),fGETUBYTE(4,RttV)) + 453 fMPY16SS(fGETBYTE(5,RssV),fGETUBYTE(5,RttV))))); 454 fSETHALF(3,RxxV,fSATN(16,(fGETHALF(3,RxxV) + 455 fMPY16SS(fGETBYTE(6,RssV),fGETUBYTE(6,RttV)) + 456 fMPY16SS(fGETBYTE(7,RssV),fGETUBYTE(7,RttV))))); 457 }) 458 459 460 461 /* Full version */ 462 #undef dmpy_sema 463 #define dmpy_sema(N)\ 464 { fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV))) + \ 465 fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)))));\ 466 fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV))) + \ 467 fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV)))));\ 468 } 469 Q6INSN(M2_vdmacs_s0,"Rxx32+=vdmpy(Rss32,Rtt32):sat",ATTRIBS(), "",dmpy_sema(0)) 470 Q6INSN(M2_vdmacs_s1,"Rxx32+=vdmpy(Rss32,Rtt32):<<1:sat",ATTRIBS(),"",dmpy_sema(1)) 471 472 #undef dmpy_sema 473 #define dmpy_sema(N)\ 474 { fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV))) + \ 475 fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)))));\ 476 fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV))) + \ 477 fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV)))));\ 478 } 479 480 Q6INSN(M2_vdmpys_s0,"Rdd32=vdmpy(Rss32,Rtt32):sat",ATTRIBS(), "",dmpy_sema(0)) 481 Q6INSN(M2_vdmpys_s1,"Rdd32=vdmpy(Rss32,Rtt32):<<1:sat",ATTRIBS(),"",dmpy_sema(1)) 482 483 484 485 /******************************************************/ 486 /* complex multiply/mac with */ 487 /* real&imag are packed together and always saturated */ 488 /* to protect against overflow. */ 489 /******************************************************/ 490 491 #undef cmpy_sema 492 #define cmpy_sema(N,CONJMINUS,CONJPLUS)\ 493 { fSETHALF(1,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV))) CONJMINUS \ 494 fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV))) + 0x8000))));\ 495 fSETHALF(0,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV))) CONJPLUS \ 496 fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV))) + 0x8000))));\ 497 } 498 Q6INSN(M2_cmpyrs_s0,"Rd32=cmpy(Rs32,Rt32):rnd:sat",ATTRIBS(), "Complex Multiply",cmpy_sema(0,+,-)) 499 Q6INSN(M2_cmpyrs_s1,"Rd32=cmpy(Rs32,Rt32):<<1:rnd:sat",ATTRIBS(),"Complex Multiply",cmpy_sema(1,+,-)) 500 501 502 Q6INSN(M2_cmpyrsc_s0,"Rd32=cmpy(Rs32,Rt32*):rnd:sat",ATTRIBS(A_ARCHV2), "Complex Multiply",cmpy_sema(0,-,+)) 503 Q6INSN(M2_cmpyrsc_s1,"Rd32=cmpy(Rs32,Rt32*):<<1:rnd:sat",ATTRIBS(A_ARCHV2),"Complex Multiply",cmpy_sema(1,-,+)) 504 505 506 #undef cmpy_sema 507 #define cmpy_sema(N,CONJMINUS,CONJPLUS)\ 508 { fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV))) CONJMINUS \ 509 fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV)))));\ 510 fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV))) CONJPLUS \ 511 fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV)))));\ 512 } 513 Q6INSN(M2_cmacs_s0,"Rxx32+=cmpy(Rs32,Rt32):sat",ATTRIBS(), "Complex Multiply",cmpy_sema(0,+,-)) 514 Q6INSN(M2_cmacs_s1,"Rxx32+=cmpy(Rs32,Rt32):<<1:sat",ATTRIBS(),"Complex Multiply",cmpy_sema(1,+,-)) 515 516 /* EJP: Need mac versions w/ CONJ T? */ 517 Q6INSN(M2_cmacsc_s0,"Rxx32+=cmpy(Rs32,Rt32*):sat",ATTRIBS(A_ARCHV2), "Complex Multiply",cmpy_sema(0,-,+)) 518 Q6INSN(M2_cmacsc_s1,"Rxx32+=cmpy(Rs32,Rt32*):<<1:sat",ATTRIBS(A_ARCHV2),"Complex Multiply",cmpy_sema(1,-,+)) 519 520 521 #undef cmpy_sema 522 #define cmpy_sema(N,CONJMINUS,CONJPLUS)\ 523 { fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV))) CONJMINUS \ 524 fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV)))));\ 525 fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV))) CONJPLUS \ 526 fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV)))));\ 527 } 528 529 Q6INSN(M2_cmpys_s0,"Rdd32=cmpy(Rs32,Rt32):sat",ATTRIBS(), "Complex Multiply",cmpy_sema(0,+,-)) 530 Q6INSN(M2_cmpys_s1,"Rdd32=cmpy(Rs32,Rt32):<<1:sat",ATTRIBS(),"Complex Multiply",cmpy_sema(1,+,-)) 531 532 Q6INSN(M2_cmpysc_s0,"Rdd32=cmpy(Rs32,Rt32*):sat",ATTRIBS(A_ARCHV2), "Complex Multiply",cmpy_sema(0,-,+)) 533 Q6INSN(M2_cmpysc_s1,"Rdd32=cmpy(Rs32,Rt32*):<<1:sat",ATTRIBS(A_ARCHV2),"Complex Multiply",cmpy_sema(1,-,+)) 534 535 536 537 #undef cmpy_sema 538 #define cmpy_sema(N,CONJMINUS,CONJPLUS)\ 539 { fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) - (fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV))) CONJMINUS \ 540 fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV))))));\ 541 fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) - (fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV))) CONJPLUS \ 542 fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV))))));\ 543 } 544 Q6INSN(M2_cnacs_s0,"Rxx32-=cmpy(Rs32,Rt32):sat",ATTRIBS(A_ARCHV2), "Complex Multiply",cmpy_sema(0,+,-)) 545 Q6INSN(M2_cnacs_s1,"Rxx32-=cmpy(Rs32,Rt32):<<1:sat",ATTRIBS(A_ARCHV2),"Complex Multiply",cmpy_sema(1,+,-)) 546 547 /* EJP: need CONJ versions? */ 548 Q6INSN(M2_cnacsc_s0,"Rxx32-=cmpy(Rs32,Rt32*):sat",ATTRIBS(A_ARCHV2), "Complex Multiply",cmpy_sema(0,-,+)) 549 Q6INSN(M2_cnacsc_s1,"Rxx32-=cmpy(Rs32,Rt32*):<<1:sat",ATTRIBS(A_ARCHV2),"Complex Multiply",cmpy_sema(1,-,+)) 550 551 552 /******************************************************/ 553 /* complex interpolation */ 554 /* Given a pair of complex values, scale by a,b, sum */ 555 /* Saturate/shift1 and round/pack */ 556 /******************************************************/ 557 558 #undef vrcmpys_sema 559 #define vrcmpys_sema(N,INWORD) \ 560 { fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,INWORD))) + \ 561 fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(1,INWORD)))));\ 562 fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,INWORD))) + \ 563 fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(1,INWORD)))));\ 564 } 565 566 567 568 Q6INSN(M2_vrcmpys_s1_h,"Rdd32=vrcmpys(Rss32,Rtt32):<<1:sat:raw:hi",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(1,RttV))) 569 Q6INSN(M2_vrcmpys_s1_l,"Rdd32=vrcmpys(Rss32,Rtt32):<<1:sat:raw:lo",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(0,RttV))) 570 571 #undef vrcmpys_sema 572 #define vrcmpys_sema(N,INWORD) \ 573 { fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,INWORD))) + \ 574 fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(1,INWORD)))));\ 575 fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,INWORD))) + \ 576 fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(1,INWORD)))));\ 577 } 578 579 580 581 Q6INSN(M2_vrcmpys_acc_s1_h,"Rxx32+=vrcmpys(Rss32,Rtt32):<<1:sat:raw:hi",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(1,RttV))) 582 Q6INSN(M2_vrcmpys_acc_s1_l,"Rxx32+=vrcmpys(Rss32,Rtt32):<<1:sat:raw:lo",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(0,RttV))) 583 584 #undef vrcmpys_sema 585 #define vrcmpys_sema(N,INWORD) \ 586 { fSETHALF(1,RdV,fGETHALF(1,fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,INWORD))) + \ 587 fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(1,INWORD))) + 0x8000)));\ 588 fSETHALF(0,RdV,fGETHALF(1,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,INWORD))) + \ 589 fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(1,INWORD))) + 0x8000)));\ 590 } 591 592 Q6INSN(M2_vrcmpys_s1rp_h,"Rd32=vrcmpys(Rss32,Rtt32):<<1:rnd:sat:raw:hi",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(1,RttV))) 593 Q6INSN(M2_vrcmpys_s1rp_l,"Rd32=vrcmpys(Rss32,Rtt32):<<1:rnd:sat:raw:lo",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(0,RttV))) 594 595 /**************************************************************/ 596 /* mixed mode 32x16 vector dual multiplies */ 597 /* */ 598 /**************************************************************/ 599 600 /* SIGNED 32 x SIGNED 16 */ 601 602 603 #undef mixmpy_sema 604 #define mixmpy_sema(N)\ 605 { fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(2,RttV))))>>16)) ); \ 606 fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RttV))))>>16)) ); \ 607 } 608 Q6INSN(M2_mmacls_s0,"Rxx32+=vmpyweh(Rss32,Rtt32):sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0)) 609 Q6INSN(M2_mmacls_s1,"Rxx32+=vmpyweh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1)) 610 611 #undef mixmpy_sema 612 #define mixmpy_sema(N)\ 613 { fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(3,RttV))))>>16) )); \ 614 fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RttV))))>>16 ))); \ 615 } 616 Q6INSN(M2_mmachs_s0,"Rxx32+=vmpywoh(Rss32,Rtt32):sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0)) 617 Q6INSN(M2_mmachs_s1,"Rxx32+=vmpywoh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1)) 618 619 #undef mixmpy_sema 620 #define mixmpy_sema(N)\ 621 { fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(2,RttV))))>>16)); \ 622 fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RttV))))>>16)); \ 623 } 624 Q6INSN(M2_mmpyl_s0,"Rdd32=vmpyweh(Rss32,Rtt32):sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0)) 625 Q6INSN(M2_mmpyl_s1,"Rdd32=vmpyweh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1)) 626 627 #undef mixmpy_sema 628 #define mixmpy_sema(N)\ 629 { fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(3,RttV))))>>16)); \ 630 fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RttV))))>>16)); \ 631 } 632 Q6INSN(M2_mmpyh_s0,"Rdd32=vmpywoh(Rss32,Rtt32):sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0)) 633 Q6INSN(M2_mmpyh_s1,"Rdd32=vmpywoh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1)) 634 635 636 /* With rounding */ 637 638 #undef mixmpy_sema 639 #define mixmpy_sema(N)\ 640 { fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(2,RttV)))+0x8000)>>16)) ); \ 641 fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RttV)))+0x8000)>>16)) ); \ 642 } 643 Q6INSN(M2_mmacls_rs0,"Rxx32+=vmpyweh(Rss32,Rtt32):rnd:sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0)) 644 Q6INSN(M2_mmacls_rs1,"Rxx32+=vmpyweh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1)) 645 646 #undef mixmpy_sema 647 #define mixmpy_sema(N)\ 648 { fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(3,RttV)))+0x8000)>>16) )); \ 649 fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RttV)))+0x8000)>>16 ))); \ 650 } 651 Q6INSN(M2_mmachs_rs0,"Rxx32+=vmpywoh(Rss32,Rtt32):rnd:sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0)) 652 Q6INSN(M2_mmachs_rs1,"Rxx32+=vmpywoh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1)) 653 654 #undef mixmpy_sema 655 #define mixmpy_sema(N)\ 656 { fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(2,RttV)))+0x8000)>>16)); \ 657 fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RttV)))+0x8000)>>16)); \ 658 } 659 Q6INSN(M2_mmpyl_rs0,"Rdd32=vmpyweh(Rss32,Rtt32):rnd:sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0)) 660 Q6INSN(M2_mmpyl_rs1,"Rdd32=vmpyweh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1)) 661 662 #undef mixmpy_sema 663 #define mixmpy_sema(N)\ 664 { fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(3,RttV)))+0x8000)>>16)); \ 665 fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RttV)))+0x8000)>>16)); \ 666 } 667 Q6INSN(M2_mmpyh_rs0,"Rdd32=vmpywoh(Rss32,Rtt32):rnd:sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0)) 668 Q6INSN(M2_mmpyh_rs1,"Rdd32=vmpywoh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1)) 669 670 671 #undef mixmpy_sema 672 #define mixmpy_sema(DEST,EQUALS,N)\ 673 { DEST EQUALS fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(2,RttV))) + fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RttV)));} 674 675 Q6INSN(M4_vrmpyeh_s0,"Rdd32=vrmpyweh(Rss32,Rtt32)",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(RddV,=,0)) 676 Q6INSN(M4_vrmpyeh_s1,"Rdd32=vrmpyweh(Rss32,Rtt32):<<1",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(RddV,=,1)) 677 Q6INSN(M4_vrmpyeh_acc_s0,"Rxx32+=vrmpyweh(Rss32,Rtt32)",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(RxxV,+=,0)) 678 Q6INSN(M4_vrmpyeh_acc_s1,"Rxx32+=vrmpyweh(Rss32,Rtt32):<<1",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(RxxV,+=,1)) 679 680 #undef mixmpy_sema 681 #define mixmpy_sema(DEST,EQUALS,N)\ 682 { DEST EQUALS fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(3,RttV))) + fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RttV)));} 683 684 Q6INSN(M4_vrmpyoh_s0,"Rdd32=vrmpywoh(Rss32,Rtt32)",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(RddV,=,0)) 685 Q6INSN(M4_vrmpyoh_s1,"Rdd32=vrmpywoh(Rss32,Rtt32):<<1",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(RddV,=,1)) 686 Q6INSN(M4_vrmpyoh_acc_s0,"Rxx32+=vrmpywoh(Rss32,Rtt32)",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(RxxV,+=,0)) 687 Q6INSN(M4_vrmpyoh_acc_s1,"Rxx32+=vrmpywoh(Rss32,Rtt32):<<1",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(RxxV,+=,1)) 688 689 690 691 692 693 694 #undef mixmpy_sema 695 #define mixmpy_sema(N,H,RND)\ 696 { RdV = fSAT((fSCALE(N,fMPY3216SS(RsV,fGETHALF(H,RtV)))RND)>>16); \ 697 } 698 Q6INSN(M2_hmmpyl_rs1,"Rd32=mpy(Rs32,Rt.L32):<<1:rnd:sat",ATTRIBS(A_ARCHV2),"Mixed Precision Multiply",mixmpy_sema(1,0,+0x8000)) 699 Q6INSN(M2_hmmpyh_rs1,"Rd32=mpy(Rs32,Rt.H32):<<1:rnd:sat",ATTRIBS(A_ARCHV2),"Mixed Precision Multiply",mixmpy_sema(1,1,+0x8000)) 700 Q6INSN(M2_hmmpyl_s1,"Rd32=mpy(Rs32,Rt.L32):<<1:sat",ATTRIBS(A_ARCHV2),"Mixed Precision Multiply",mixmpy_sema(1,0,)) 701 Q6INSN(M2_hmmpyh_s1,"Rd32=mpy(Rs32,Rt.H32):<<1:sat",ATTRIBS(A_ARCHV2),"Mixed Precision Multiply",mixmpy_sema(1,1,)) 702 703 704 705 706 707 708 709 710 711 /* SIGNED 32 x UNSIGNED 16 */ 712 713 #undef mixmpy_sema 714 #define mixmpy_sema(N)\ 715 { fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(2,RttV))))>>16)) ); \ 716 fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(0,RttV))))>>16)) ); \ 717 } 718 Q6INSN(M2_mmaculs_s0,"Rxx32+=vmpyweuh(Rss32,Rtt32):sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0)) 719 Q6INSN(M2_mmaculs_s1,"Rxx32+=vmpyweuh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1)) 720 721 #undef mixmpy_sema 722 #define mixmpy_sema(N)\ 723 { fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(3,RttV))))>>16) )); \ 724 fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(1,RttV))))>>16 ))); \ 725 } 726 Q6INSN(M2_mmacuhs_s0,"Rxx32+=vmpywouh(Rss32,Rtt32):sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0)) 727 Q6INSN(M2_mmacuhs_s1,"Rxx32+=vmpywouh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1)) 728 729 #undef mixmpy_sema 730 #define mixmpy_sema(N)\ 731 { fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(2,RttV))))>>16)); \ 732 fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(0,RttV))))>>16)); \ 733 } 734 Q6INSN(M2_mmpyul_s0,"Rdd32=vmpyweuh(Rss32,Rtt32):sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0)) 735 Q6INSN(M2_mmpyul_s1,"Rdd32=vmpyweuh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1)) 736 737 #undef mixmpy_sema 738 #define mixmpy_sema(N)\ 739 { fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(3,RttV))))>>16)); \ 740 fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(1,RttV))))>>16)); \ 741 } 742 Q6INSN(M2_mmpyuh_s0,"Rdd32=vmpywouh(Rss32,Rtt32):sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0)) 743 Q6INSN(M2_mmpyuh_s1,"Rdd32=vmpywouh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1)) 744 745 746 /* With rounding */ 747 748 #undef mixmpy_sema 749 #define mixmpy_sema(N)\ 750 { fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(2,RttV)))+0x8000)>>16)) ); \ 751 fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(0,RttV)))+0x8000)>>16)) ); \ 752 } 753 Q6INSN(M2_mmaculs_rs0,"Rxx32+=vmpyweuh(Rss32,Rtt32):rnd:sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0)) 754 Q6INSN(M2_mmaculs_rs1,"Rxx32+=vmpyweuh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1)) 755 756 #undef mixmpy_sema 757 #define mixmpy_sema(N)\ 758 { fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(3,RttV)))+0x8000)>>16) )); \ 759 fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(1,RttV)))+0x8000)>>16 ))); \ 760 } 761 Q6INSN(M2_mmacuhs_rs0,"Rxx32+=vmpywouh(Rss32,Rtt32):rnd:sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0)) 762 Q6INSN(M2_mmacuhs_rs1,"Rxx32+=vmpywouh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1)) 763 764 #undef mixmpy_sema 765 #define mixmpy_sema(N)\ 766 { fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(2,RttV)))+0x8000)>>16)); \ 767 fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(0,RttV)))+0x8000)>>16)); \ 768 } 769 Q6INSN(M2_mmpyul_rs0,"Rdd32=vmpyweuh(Rss32,Rtt32):rnd:sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0)) 770 Q6INSN(M2_mmpyul_rs1,"Rdd32=vmpyweuh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1)) 771 772 #undef mixmpy_sema 773 #define mixmpy_sema(N)\ 774 { fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(3,RttV)))+0x8000)>>16)); \ 775 fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(1,RttV)))+0x8000)>>16)); \ 776 } 777 Q6INSN(M2_mmpyuh_rs0,"Rdd32=vmpywouh(Rss32,Rtt32):rnd:sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0)) 778 Q6INSN(M2_mmpyuh_rs1,"Rdd32=vmpywouh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1)) 779 780 781 /**************************************************************/ 782 /* complex mac with full 64-bit accum - no sat, no shift */ 783 /* either do real or accum, never both */ 784 /**************************************************************/ 785 786 Q6INSN(M2_vrcmaci_s0,"Rxx32+=vrcmpyi(Rss32,Rtt32)",ATTRIBS(),"Vector Complex Mac Imaginary", 787 { 788 RxxV = RxxV + fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,RttV)) + \ 789 fMPY16SS(fGETHALF(0,RssV),fGETHALF(1,RttV)) + \ 790 fMPY16SS(fGETHALF(3,RssV),fGETHALF(2,RttV)) + \ 791 fMPY16SS(fGETHALF(2,RssV),fGETHALF(3,RttV));\ 792 }) 793 794 Q6INSN(M2_vrcmacr_s0,"Rxx32+=vrcmpyr(Rss32,Rtt32)",ATTRIBS(),"Vector Complex Mac Real", 795 { RxxV = RxxV + fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)) - \ 796 fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)) + \ 797 fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)) - \ 798 fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\ 799 }) 800 801 Q6INSN(M2_vrcmaci_s0c,"Rxx32+=vrcmpyi(Rss32,Rtt32*)",ATTRIBS(A_ARCHV2),"Vector Complex Mac Imaginary", 802 { 803 RxxV = RxxV + fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,RttV)) - \ 804 fMPY16SS(fGETHALF(0,RssV),fGETHALF(1,RttV)) + \ 805 fMPY16SS(fGETHALF(3,RssV),fGETHALF(2,RttV)) - \ 806 fMPY16SS(fGETHALF(2,RssV),fGETHALF(3,RttV));\ 807 }) 808 809 Q6INSN(M2_vrcmacr_s0c,"Rxx32+=vrcmpyr(Rss32,Rtt32*)",ATTRIBS(A_ARCHV2),"Vector Complex Mac Real", 810 { RxxV = RxxV + fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)) + \ 811 fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)) + \ 812 fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)) + \ 813 fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\ 814 }) 815 816 Q6INSN(M2_cmaci_s0,"Rxx32+=cmpyi(Rs32,Rt32)",ATTRIBS(),"Vector Complex Mac Imaginary", 817 { 818 RxxV = RxxV + fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV)) + \ 819 fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV)); 820 }) 821 822 Q6INSN(M2_cmacr_s0,"Rxx32+=cmpyr(Rs32,Rt32)",ATTRIBS(),"Vector Complex Mac Real", 823 { RxxV = RxxV + fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV)) - \ 824 fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV)); 825 }) 826 827 828 Q6INSN(M2_vrcmpyi_s0,"Rdd32=vrcmpyi(Rss32,Rtt32)",ATTRIBS(),"Vector Complex Mpy Imaginary", 829 { 830 RddV = fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,RttV)) + \ 831 fMPY16SS(fGETHALF(0,RssV),fGETHALF(1,RttV)) + \ 832 fMPY16SS(fGETHALF(3,RssV),fGETHALF(2,RttV)) + \ 833 fMPY16SS(fGETHALF(2,RssV),fGETHALF(3,RttV));\ 834 }) 835 836 Q6INSN(M2_vrcmpyr_s0,"Rdd32=vrcmpyr(Rss32,Rtt32)",ATTRIBS(),"Vector Complex Mpy Real", 837 { RddV = fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)) - \ 838 fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)) + \ 839 fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)) - \ 840 fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\ 841 }) 842 843 Q6INSN(M2_vrcmpyi_s0c,"Rdd32=vrcmpyi(Rss32,Rtt32*)",ATTRIBS(A_ARCHV2),"Vector Complex Mpy Imaginary", 844 { 845 RddV = fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,RttV)) - \ 846 fMPY16SS(fGETHALF(0,RssV),fGETHALF(1,RttV)) + \ 847 fMPY16SS(fGETHALF(3,RssV),fGETHALF(2,RttV)) - \ 848 fMPY16SS(fGETHALF(2,RssV),fGETHALF(3,RttV));\ 849 }) 850 851 Q6INSN(M2_vrcmpyr_s0c,"Rdd32=vrcmpyr(Rss32,Rtt32*)",ATTRIBS(A_ARCHV2),"Vector Complex Mpy Real", 852 { RddV = fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)) + \ 853 fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)) + \ 854 fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)) + \ 855 fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\ 856 }) 857 858 Q6INSN(M2_cmpyi_s0,"Rdd32=cmpyi(Rs32,Rt32)",ATTRIBS(),"Vector Complex Mpy Imaginary", 859 { 860 RddV = fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV)) + \ 861 fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV)); 862 }) 863 864 Q6INSN(M2_cmpyr_s0,"Rdd32=cmpyr(Rs32,Rt32)",ATTRIBS(),"Vector Complex Mpy Real", 865 { RddV = fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV)) - \ 866 fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV)); 867 }) 868 869 870 /**************************************************************/ 871 /* Complex mpy/mac with 2x32 bit accum, sat, shift */ 872 /* 32x16 real or imag */ 873 /**************************************************************/ 874 875 #if 1 876 877 Q6INSN(M4_cmpyi_wh,"Rd32=cmpyiwh(Rss32,Rt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Complex Multiply", 878 { 879 RdV = fSAT( ( fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RtV)) 880 + fMPY3216SS(fGETWORD(1,RssV),fGETHALF(0,RtV)) 881 + 0x4000)>>15); 882 }) 883 884 885 Q6INSN(M4_cmpyr_wh,"Rd32=cmpyrwh(Rss32,Rt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Complex Multiply", 886 { 887 RdV = fSAT( ( fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RtV)) 888 - fMPY3216SS(fGETWORD(1,RssV),fGETHALF(1,RtV)) 889 + 0x4000)>>15); 890 }) 891 892 Q6INSN(M4_cmpyi_whc,"Rd32=cmpyiwh(Rss32,Rt32*):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Complex Multiply", 893 { 894 RdV = fSAT( ( fMPY3216SS(fGETWORD(1,RssV),fGETHALF(0,RtV)) 895 - fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RtV)) 896 + 0x4000)>>15); 897 }) 898 899 900 Q6INSN(M4_cmpyr_whc,"Rd32=cmpyrwh(Rss32,Rt32*):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Complex Multiply", 901 { 902 RdV = fSAT( ( fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RtV)) 903 + fMPY3216SS(fGETWORD(1,RssV),fGETHALF(1,RtV)) 904 + 0x4000)>>15); 905 }) 906 907 908 #endif 909 910 /**************************************************************/ 911 /* Vector mpy/mac with 2x32 bit accum, sat, shift */ 912 /* either do real or imag, never both */ 913 /**************************************************************/ 914 915 #undef VCMPYSEMI 916 #define VCMPYSEMI(DST,ACC0,ACC1,SHIFT,SAT) \ 917 fSETWORD(0,DST,SAT(ACC0 fSCALE(SHIFT,fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,RttV)) + \ 918 fMPY16SS(fGETHALF(0,RssV),fGETHALF(1,RttV))))); \ 919 fSETWORD(1,DST,SAT(ACC1 fSCALE(SHIFT,fMPY16SS(fGETHALF(3,RssV),fGETHALF(2,RttV)) + \ 920 fMPY16SS(fGETHALF(2,RssV),fGETHALF(3,RttV))))); \ 921 922 #undef VCMPYSEMR 923 #define VCMPYSEMR(DST,ACC0,ACC1,SHIFT,SAT) \ 924 fSETWORD(0,DST,SAT(ACC0 fSCALE(SHIFT,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)) - \ 925 fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV))))); \ 926 fSETWORD(1,DST,SAT(ACC1 fSCALE(SHIFT,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)) - \ 927 fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV))))); \ 928 929 930 #undef VCMPYIR 931 #define VCMPYIR(TAGBASE,DSTSYN,DSTVAL,ACCSEM,ACCVAL0,ACCVAL1,SHIFTSYN,SHIFTVAL,SATSYN,SATVAL) \ 932 Q6INSN(M2_##TAGBASE##i,DSTSYN ACCSEM "vcmpyi(Rss32,Rtt32)" SHIFTSYN SATSYN,ATTRIBS(A_ARCHV2), \ 933 "Vector Complex Multiply Imaginary", { VCMPYSEMI(DSTVAL,ACCVAL0,ACCVAL1,SHIFTVAL,SATVAL); }) \ 934 Q6INSN(M2_##TAGBASE##r,DSTSYN ACCSEM "vcmpyr(Rss32,Rtt32)" SHIFTSYN SATSYN,ATTRIBS(A_ARCHV2), \ 935 "Vector Complex Multiply Imaginary", { VCMPYSEMR(DSTVAL,ACCVAL0,ACCVAL1,SHIFTVAL,SATVAL); }) 936 937 938 VCMPYIR(vcmpy_s0_sat_,"Rdd32",RddV,"=",,,"",0,":sat",fSAT) 939 VCMPYIR(vcmpy_s1_sat_,"Rdd32",RddV,"=",,,":<<1",1,":sat",fSAT) 940 VCMPYIR(vcmac_s0_sat_,"Rxx32",RxxV,"+=",fGETWORD(0,RxxV) + ,fGETWORD(1,RxxV) + ,"",0,":sat",fSAT) 941 942 943 /********************************************************************** 944 * Rotation -- by 0, 90, 180, or 270 means mult by 1, J, -1, -J * 945 *********************************************************************/ 946 947 Q6INSN(S2_vcrotate,"Rdd32=vcrotate(Rss32,Rt32)",ATTRIBS(A_ARCHV2),"Rotate complex value by multiple of PI/2", 948 { 949 fHIDE(size1u_t tmp;) 950 tmp = fEXTRACTU_RANGE(RtV,1,0); 951 if (tmp == 0) { /* No rotation */ 952 fSETHALF(0,RddV,fGETHALF(0,RssV)); 953 fSETHALF(1,RddV,fGETHALF(1,RssV)); 954 } else if (tmp == 1) { /* Multiply by -J */ 955 fSETHALF(0,RddV,fGETHALF(1,RssV)); 956 fSETHALF(1,RddV,fSATH(-fGETHALF(0,RssV))); 957 } else if (tmp == 2) { /* Multiply by J */ 958 fSETHALF(0,RddV,fSATH(-fGETHALF(1,RssV))); 959 fSETHALF(1,RddV,fGETHALF(0,RssV)); 960 } else { /* Multiply by -1 */ 961 fHIDE(if (tmp != 3) fatal("C is broken");) 962 fSETHALF(0,RddV,fSATH(-fGETHALF(0,RssV))); 963 fSETHALF(1,RddV,fSATH(-fGETHALF(1,RssV))); 964 } 965 tmp = fEXTRACTU_RANGE(RtV,3,2); 966 if (tmp == 0) { /* No rotation */ 967 fSETHALF(2,RddV,fGETHALF(2,RssV)); 968 fSETHALF(3,RddV,fGETHALF(3,RssV)); 969 } else if (tmp == 1) { /* Multiply by -J */ 970 fSETHALF(2,RddV,fGETHALF(3,RssV)); 971 fSETHALF(3,RddV,fSATH(-fGETHALF(2,RssV))); 972 } else if (tmp == 2) { /* Multiply by J */ 973 fSETHALF(2,RddV,fSATH(-fGETHALF(3,RssV))); 974 fSETHALF(3,RddV,fGETHALF(2,RssV)); 975 } else { /* Multiply by -1 */ 976 fHIDE(if (tmp != 3) fatal("C is broken");) 977 fSETHALF(2,RddV,fSATH(-fGETHALF(2,RssV))); 978 fSETHALF(3,RddV,fSATH(-fGETHALF(3,RssV))); 979 } 980 }) 981 982 983 Q6INSN(S4_vrcrotate_acc,"Rxx32+=vrcrotate(Rss32,Rt32,#u2)",ATTRIBS(),"Rotate and Reduce Bytes", 984 { 985 fHIDE(int i; int tmpr; int tmpi; unsigned int control;) 986 fHIDE(int sumr; int sumi;) 987 sumr = 0; 988 sumi = 0; 989 control = fGETUBYTE(uiV,RtV); 990 for (i = 0; i < 8; i += 2) { 991 tmpr = fGETBYTE(i ,RssV); 992 tmpi = fGETBYTE(i+1,RssV); 993 switch (control & 3) { 994 case 0: /* No Rotation */ 995 sumr += tmpr; 996 sumi += tmpi; 997 break; 998 case 1: /* Multiply by -J */ 999 sumr += tmpi; 1000 sumi -= tmpr; 1001 break; 1002 case 2: /* Multiply by J */ 1003 sumr -= tmpi; 1004 sumi += tmpr; 1005 break; 1006 case 3: /* Multiply by -1 */ 1007 sumr -= tmpr; 1008 sumi -= tmpi; 1009 break; 1010 fHIDE(default: fatal("C is broken!");) 1011 } 1012 control = control >> 2; 1013 } 1014 fSETWORD(0,RxxV,fGETWORD(0,RxxV) + sumr); 1015 fSETWORD(1,RxxV,fGETWORD(1,RxxV) + sumi); 1016 }) 1017 1018 Q6INSN(S4_vrcrotate,"Rdd32=vrcrotate(Rss32,Rt32,#u2)",ATTRIBS(),"Rotate and Reduce Bytes", 1019 { 1020 fHIDE(int i; int tmpr; int tmpi; unsigned int control;) 1021 fHIDE(int sumr; int sumi;) 1022 sumr = 0; 1023 sumi = 0; 1024 control = fGETUBYTE(uiV,RtV); 1025 for (i = 0; i < 8; i += 2) { 1026 tmpr = fGETBYTE(i ,RssV); 1027 tmpi = fGETBYTE(i+1,RssV); 1028 switch (control & 3) { 1029 case 0: /* No Rotation */ 1030 sumr += tmpr; 1031 sumi += tmpi; 1032 break; 1033 case 1: /* Multiply by -J */ 1034 sumr += tmpi; 1035 sumi -= tmpr; 1036 break; 1037 case 2: /* Multiply by J */ 1038 sumr -= tmpi; 1039 sumi += tmpr; 1040 break; 1041 case 3: /* Multiply by -1 */ 1042 sumr -= tmpr; 1043 sumi -= tmpi; 1044 break; 1045 fHIDE(default: fatal("C is broken!");) 1046 } 1047 control = control >> 2; 1048 } 1049 fSETWORD(0,RddV,sumr); 1050 fSETWORD(1,RddV,sumi); 1051 }) 1052 1053 1054 Q6INSN(S2_vcnegh,"Rdd32=vcnegh(Rss32,Rt32)",ATTRIBS(),"Conditional Negate halfwords", 1055 { 1056 fHIDE(int i;) 1057 for (i = 0; i < 4; i++) { 1058 if (fGETBIT(i,RtV)) { 1059 fSETHALF(i,RddV,fSATH(-fGETHALF(i,RssV))); 1060 } else { 1061 fSETHALF(i,RddV,fGETHALF(i,RssV)); 1062 } 1063 } 1064 }) 1065 1066 Q6INSN(S2_vrcnegh,"Rxx32+=vrcnegh(Rss32,Rt32)",ATTRIBS(),"Vector Reduce Conditional Negate halfwords", 1067 { 1068 fHIDE(int i;) 1069 for (i = 0; i < 4; i++) { 1070 if (fGETBIT(i,RtV)) { 1071 RxxV += -fGETHALF(i,RssV); 1072 } else { 1073 RxxV += fGETHALF(i,RssV); 1074 } 1075 } 1076 }) 1077 1078 1079 /********************************************************************** 1080 * Finite-field multiplies. Written by David Hoyle * 1081 *********************************************************************/ 1082 1083 Q6INSN(M4_pmpyw,"Rdd32=pmpyw(Rs32,Rt32)",ATTRIBS(),"Polynomial 32bit Multiplication with Addition in GF(2)", 1084 { 1085 fHIDE(int i; unsigned int y;) 1086 fHIDE(unsigned long long x; unsigned long long prod;) 1087 x = fGETUWORD(0, RsV); 1088 y = fGETUWORD(0, RtV); 1089 1090 prod = 0; 1091 for(i=0; i < 32; i++) { 1092 if((y >> i) & 1) prod ^= (x << i); 1093 } 1094 RddV = prod; 1095 }) 1096 1097 Q6INSN(M4_vpmpyh,"Rdd32=vpmpyh(Rs32,Rt32)",ATTRIBS(),"Dual Polynomial 16bit Multiplication with Addition in GF(2)", 1098 { 1099 fHIDE(int i; unsigned int x0; unsigned int x1;) 1100 fHIDE(unsigned int y0; unsigned int y1;) 1101 fHIDE(unsigned int prod0; unsigned int prod1;) 1102 1103 x0 = fGETUHALF(0, RsV); 1104 x1 = fGETUHALF(1, RsV); 1105 y0 = fGETUHALF(0, RtV); 1106 y1 = fGETUHALF(1, RtV); 1107 1108 prod0 = prod1 = 0; 1109 for(i=0; i < 16; i++) { 1110 if((y0 >> i) & 1) prod0 ^= (x0 << i); 1111 if((y1 >> i) & 1) prod1 ^= (x1 << i); 1112 } 1113 fSETHALF(0,RddV,fGETUHALF(0,prod0)); 1114 fSETHALF(1,RddV,fGETUHALF(0,prod1)); 1115 fSETHALF(2,RddV,fGETUHALF(1,prod0)); 1116 fSETHALF(3,RddV,fGETUHALF(1,prod1)); 1117 }) 1118 1119 Q6INSN(M4_pmpyw_acc,"Rxx32^=pmpyw(Rs32,Rt32)",ATTRIBS(),"Polynomial 32bit Multiplication with Addition in GF(2)", 1120 { 1121 fHIDE(int i; unsigned int y;) 1122 fHIDE(unsigned long long x; unsigned long long prod;) 1123 x = fGETUWORD(0, RsV); 1124 y = fGETUWORD(0, RtV); 1125 1126 prod = 0; 1127 for(i=0; i < 32; i++) { 1128 if((y >> i) & 1) prod ^= (x << i); 1129 } 1130 RxxV ^= prod; 1131 }) 1132 1133 Q6INSN(M4_vpmpyh_acc,"Rxx32^=vpmpyh(Rs32,Rt32)",ATTRIBS(),"Dual Polynomial 16bit Multiplication with Addition in GF(2)", 1134 { 1135 fHIDE(int i; unsigned int x0; unsigned int x1;) 1136 fHIDE(unsigned int y0; unsigned int y1;) 1137 fHIDE(unsigned int prod0; unsigned int prod1;) 1138 1139 x0 = fGETUHALF(0, RsV); 1140 x1 = fGETUHALF(1, RsV); 1141 y0 = fGETUHALF(0, RtV); 1142 y1 = fGETUHALF(1, RtV); 1143 1144 prod0 = prod1 = 0; 1145 for(i=0; i < 16; i++) { 1146 if((y0 >> i) & 1) prod0 ^= (x0 << i); 1147 if((y1 >> i) & 1) prod1 ^= (x1 << i); 1148 } 1149 fSETHALF(0,RxxV,fGETUHALF(0,RxxV) ^ fGETUHALF(0,prod0)); 1150 fSETHALF(1,RxxV,fGETUHALF(1,RxxV) ^ fGETUHALF(0,prod1)); 1151 fSETHALF(2,RxxV,fGETUHALF(2,RxxV) ^ fGETUHALF(1,prod0)); 1152 fSETHALF(3,RxxV,fGETUHALF(3,RxxV) ^ fGETUHALF(1,prod1)); 1153 }) 1154 1155 1156 /* V70: TINY CORE */ 1157 1158 #define CMPY64(TAG,NAME,DESC,OPERAND1,OP,W0,W1,W2,W3) \ 1159 Q6INSN(M7_##TAG,"Rdd32=" NAME "(Rss32," OPERAND1 ")",ATTRIBS(A_RESTRICT_SLOT3ONLY),"Complex Multiply 64-bit " DESC, { RddV = (fMPY32SS(fGETWORD(W0, RssV), fGETWORD(W1, RttV)) OP fMPY32SS(fGETWORD(W2, RssV), fGETWORD(W3, RttV)));})\ 1160 Q6INSN(M7_##TAG##_acc,"Rxx32+=" NAME "(Rss32,"OPERAND1")",ATTRIBS(A_RESTRICT_SLOT3ONLY),"Complex Multiply-Accumulate 64-bit " DESC, { RxxV += (fMPY32SS(fGETWORD(W0, RssV), fGETWORD(W1, RttV)) OP fMPY32SS(fGETWORD(W2, RssV), fGETWORD(W3, RttV)));}) 1161 1162 CMPY64(dcmpyrw, "cmpyrw","Real","Rtt32" ,-,0,0,1,1) 1163 CMPY64(dcmpyrwc,"cmpyrw","Real","Rtt32*",+,0,0,1,1) 1164 CMPY64(dcmpyiw, "cmpyiw","Imag","Rtt32" ,+,0,1,1,0) 1165 CMPY64(dcmpyiwc,"cmpyiw","Imag","Rtt32*",-,1,0,0,1) 1166 1167 #define CMPY128(TAG, NAME, OPERAND1, WORD0, WORD1, WORD2, WORD3, OP) \ 1168 Q6INSN(M7_##TAG,"Rd32=" NAME "(Rss32,"OPERAND1"):<<1:sat",ATTRIBS(A_RESTRICT_SLOT3ONLY),"Complex Multiply 32-bit result real", \ 1169 { \ 1170 fHIDE(size16s_t acc128;)\ 1171 fHIDE(size16s_t tmp128;)\ 1172 fHIDE(size8s_t acc64;)\ 1173 tmp128 = fCAST8S_16S(fMPY32SS(fGETWORD(WORD0, RssV), fGETWORD(WORD1, RttV)));\ 1174 acc128 = fCAST8S_16S(fMPY32SS(fGETWORD(WORD2, RssV), fGETWORD(WORD3, RttV)));\ 1175 acc128 = OP(tmp128,acc128);\ 1176 acc128 = fSHIFTR128(acc128, 31);\ 1177 acc64 = fCAST16S_8S(acc128);\ 1178 RdV = fSATW(acc64);\ 1179 }) 1180 1181 1182 CMPY128(wcmpyrw, "cmpyrw", "Rtt32", 0, 0, 1, 1, fSUB128) 1183 CMPY128(wcmpyrwc, "cmpyrw", "Rtt32*", 0, 0, 1, 1, fADD128) 1184 CMPY128(wcmpyiw, "cmpyiw", "Rtt32", 0, 1, 1, 0, fADD128) 1185 CMPY128(wcmpyiwc, "cmpyiw", "Rtt32*", 1, 0, 0, 1, fSUB128) 1186 1187 1188 #define CMPY128RND(TAG, NAME, OPERAND1, WORD0, WORD1, WORD2, WORD3, OP) \ 1189 Q6INSN(M7_##TAG##_rnd,"Rd32=" NAME "(Rss32,"OPERAND1"):<<1:rnd:sat",ATTRIBS(A_RESTRICT_SLOT3ONLY),"Complex Multiply 32-bit result real", \ 1190 { \ 1191 fHIDE(size16s_t acc128;)\ 1192 fHIDE(size16s_t tmp128;)\ 1193 fHIDE(size16s_t const128;)\ 1194 fHIDE(size8s_t acc64;)\ 1195 tmp128 = fCAST8S_16S(fMPY32SS(fGETWORD(WORD0, RssV), fGETWORD(WORD1, RttV)));\ 1196 acc128 = fCAST8S_16S(fMPY32SS(fGETWORD(WORD2, RssV), fGETWORD(WORD3, RttV)));\ 1197 const128 = fCAST8S_16S(fCONSTLL(0x40000000));\ 1198 acc128 = OP(tmp128,acc128);\ 1199 acc128 = fADD128(acc128,const128);\ 1200 acc128 = fSHIFTR128(acc128, 31);\ 1201 acc64 = fCAST16S_8S(acc128);\ 1202 RdV = fSATW(acc64);\ 1203 }) 1204 1205 CMPY128RND(wcmpyrw, "cmpyrw", "Rtt32", 0, 0, 1, 1, fSUB128) 1206 CMPY128RND(wcmpyrwc, "cmpyrw", "Rtt32*", 0, 0, 1, 1, fADD128) 1207 CMPY128RND(wcmpyiw, "cmpyiw", "Rtt32", 0, 1, 1, 0, fADD128) 1208 CMPY128RND(wcmpyiwc, "cmpyiw", "Rtt32*", 1, 0, 0, 1, fSUB128)